Simulation
Below we simulate data from the vector autoregressive model (VAR) of order-1 as illustrated in Figure 1 of the main manuscript. This data is used to double check if the CLPM in SEM, the repeated multiple regression method, and the SNMM with G-estimation method were correctly implemented in the R code. Furthermore, the simulations were used to assess the sensitivity of model results to different sufficient adjustment sets.
These simulations were not mentioned in the main paper, and mainly served as a check for the authors. However, we do include them in the supplementary materials, as readers main find it useful to use these simulation for obtaining some intuition of the methods.
Data generating mechanism
Data are generated from a the vector autoregressive model (VAR) of order-1 as illustrated in Figure 1 of the main manuscript. The relationships between variables are described by polynomials of degree \(k\), and the residuals are normally distributed. Baseline covariates \(C\) are binomial random variables.
Population parameter values for the separate relationships can be found in the R code below. For a linear data generating mechanism (\(k = 1\)), the population parameter values of the controlled direct effect (CDE) of \(X_{0} \rightarrow Y_{3}\) is 0.05; the CDE of \(X_{1} \rightarrow Y_{3}\) is 0.1; and the CDE of \(X_{2} \rightarrow Y_{3}\) is 0.2.
simulate_data_test <- function(n) {
# Simulation factors
kth_bX <- 0
kth_bY <- 0
# Baseline variables
C1_b <- rbinom(n, 1, .5)
C2_b <- rbinom(n, 1, .1)
C3_b <-rbinom(n, 1, .4)
Y_b <- rnorm(n, 5, .5)
X_b <- rnorm(n, 5, .5)
# Stationary process
Y_0 <- 3 + 0.5 * Y_b + 0.2 * X_b + 0.5 * C1_b +
0.5 * I((Y_b - mean(Y_b))^kth_bY) + 1 * I((X_b - mean(X_b))^kth_bX) + rnorm(n, sd = .5)
X_0 <- 3 + 0.2 * Y_b + 0.5 * X_b + 0.5 * C1_b + rnorm(n, sd = .5)
Y_1 <- 3 + 0.5 * Y_0 + 0.2 * X_0 + 0.5 * C1_b +
0.5 * I((Y_0 - mean(Y_0))^kth_bY) + 1 * I((X_0 - mean(X_0))^kth_bX) + rnorm(n, sd = .5)
X_1 <- 3 + 0.2 * Y_0 + 0.5 * X_0 + 0.5 * C1_b + rnorm(n, sd = .5)
Y_2 <- 3 + 0.5 * Y_1 + 0.2 * X_1 + 0.5 * C1_b +
0.5 * I((Y_1 - mean(Y_1))^kth_bY) + 1 * I((X_1 - mean(X_1))^kth_bX) + rnorm(n, sd = .5)
X_2 <- 3 + 0.2 * Y_1 + 0.5 * X_1 + 0.5 * C1_b + rnorm(n, sd = .5)
Y_3 <- 3 + 0.5 * Y_2 + 0.2 * X_2 + 0.5 * C1_b +
0.5 * I((Y_2 - mean(Y_2))^kth_bY) + 1 * I((X_2 - mean(X_2))^kth_bX) + rnorm(n, sd = .5)
X_3 <- 3 + 0.2 * Y_2 + 0.5 * X_2 + 0.5 * C1_b + rnorm(n, sd = .5)
plot(Y_0, X_1)
plot(X_1, X_2)
dat_test <- data.frame(
C1_b, C2_b, C3_b, X_b, Y_b,
X_0, Y_0,
X_1, Y_1,
X_2, Y_2,
X_3, Y_3
)
return(dat_test)
}Cross-lagged panel modeling using SEM
m_CLPM_test <- '
# Estimate lagged effects
X_1 ~ X_0 + Y_0 + C1_b
Y_1 ~ Y_0 + b_y1x0*X_0 + C1_b
X_2 ~ X_1 + Y_1 + C1_b
Y_2 ~ b_y2x1*X_1 + b_y2y1*Y_1 + C1_b
X_3 ~ X_2 + Y_2 + C1_b
Y_3 ~ b_y3x2*X_2 + b_y3y2*Y_2 + C1_b
# Estimate (co)variance at first wave
X_0 ~~ X_0
Y_0 ~~ Y_0
X_0 ~~ Y_0
# Estimate residual (co)variances
X_1 ~~ X_1
Y_1 ~~ Y_1
X_1 ~~ Y_1
X_2 ~~ X_2
Y_2 ~~ Y_2
X_2 ~~ Y_2
X_3 ~~ X_3
Y_3 ~~ Y_3
X_3 ~~ Y_3
# Define joint effects
CDE_0 := b_y1x0 * b_y2y1 * b_y3y2
CDE_1 := b_y2x1 * b_y3y2
CDE_2 := b_y3x2
'
fit_CLPM_test <- lavaan(
model = m_CLPM_test,
data = dat_test,
missing = "ML",
meanstructure = TRUE,
int.ov.free = TRUE,
orthogonal.x = FALSE,
se = "bootstrap",
bootstrap = 99,
parallel = "snow",
ncpus = 7
)
summary(fit_CLPM_test)
parameterEstimates(fit_CLPM_test, boot.ci.type = "perc")Repeated multiple regression
# Test multiple regression method with L_{t} in adjustment set----
n_replications <- 500
out_test_MR_Lt <- matrix(NA, nrow = n_replications, ncol = 3)
colnames(out_test_MR_Lt) <- c("CDE0", "CDE1", "CDE2")
for(i in 1:n_replications) {
dat_test <- simulate_data_test(1000)
fit_CDE2_test_Lt <- glm(Y_3 ~ X_2 + Y_2 + C1_b, data = dat_test)
fit_CDE1_test_Lt <- glm(Y_3 ~ X_1 + Y_1 + C1_b + X_2, data = dat_test)
fit_CDE0_test_Lt <- glm(Y_3 ~ X_0 + Y_0 + C1_b + X_1 + X_2, data = dat_test)
out_test_MR_Lt[i, "CDE0"] <- coef(fit_CDE0_test_Lt)["X_0"]
out_test_MR_Lt[i, "CDE1"] <- coef(fit_CDE1_test_Lt)["X_1"]
out_test_MR_Lt[i, "CDE2"] <- coef(fit_CDE2_test_Lt)["X_2"]
}
colMeans(out_test_MR_Lt)
# Test multiple regression method with L_{t - 1} in adjustment set----
out_test_MR_Ltmin1 <- matrix(NA, nrow = n_replications, ncol = 3)
colnames(out_test_MR_Ltmin1) <- c("CDE0", "CDE1", "CDE2")
for(i in 1:n_replications) {
dat_test <- simulate_data_test(1000)
fit_CDE2_test_Ltmin1 <- glm(Y_3 ~ X_2 + X_1 + Y_1 + C1_b, data = dat_test)
fit_CDE1_test_Ltmin1 <- glm(Y_3 ~ X_1 + X_0 + Y_0 + C1_b + X_2, data = dat_test)
fit_CDE0_test_Ltmin1 <- glm(Y_3 ~ X_0 + C1_b + X_1 + X_2, data = dat_test)
out_test_MR_Ltmin1[i, "CDE0"] <- coef(fit_CDE0_test_Ltmin1)["X_0"]
out_test_MR_Ltmin1[i, "CDE1"] <- coef(fit_CDE1_test_Ltmin1)["X_1"]
out_test_MR_Ltmin1[i, "CDE2"] <- coef(fit_CDE2_test_Ltmin1)["X_2"]
}
colMeans(out_test_MR_Ltmin1) SNMM using G-estimation
# Test G-estimation with L_{t} in adjustment set----
out_test_Gest_Lt <- matrix(NA, nrow = n_replications, ncol = 3)
colnames(out_test_Gest_Lt) <- c("CDE0", "CDE1", "CDE2")
for(i in 1:n_replications) {
# Generate data
dat_test <- simulate_data_test(1000)
# Exposure models with Lag0 covariates
fit_PS_X2 <- glm(X_2 ~ Y_2 + C1_b, data = dat_test)
fit_PS_X1 <- glm(X_1 ~ Y_1 + C1_b, data = dat_test)
fit_PS_X0 <- glm(X_0 ~ Y_0 + C1_b, data = dat_test)
# Calculate predicted exposure
dat_test$PS2 <- predict.glm(fit_PS_X2, type = "response")
dat_test$PS1 <- predict.glm(fit_PS_X1, type = "response")
dat_test$PS0 <- predict.glm(fit_PS_X0, type = "response")
# Outcome models
fit_CDE2 <- glm(Y_3 ~ X_2 + Y_2 + C1_b + PS2, data = dat_test)
dat_test$H2 <- dat_test$Y_3 - coef(fit_CDE2)["X_2"] * dat_test$X_2 # Create blipped-down version Y
fit_CDE1 <- glm(H2 ~ X_1 + Y_1 + C1_b + PS1, data = dat_test)
dat_test$H1 <- dat_test$H2 - coef(fit_CDE1)["X_1"] * dat_test$X_1 # Create blipped-down version Y
fit_CDE0 <- glm(H1 ~ X_0 + Y_0 + C1_b + PS0, data = dat_test)
out_test_Gest_Lt[i, "CDE0"] <- coef(fit_CDE0)["X_0"]
out_test_Gest_Lt[i, "CDE1"] <- coef(fit_CDE1)["X_1"]
out_test_Gest_Lt[i, "CDE2"] <- coef(fit_CDE2)["X_2"]
}
colMeans(out_test_Gest_Lt)
# Test G-estimation with L_{t - 1} in adjustment set----
out_test_Gest_Ltmin1 <- matrix(NA, nrow = n_replications, ncol = 3)
colnames(out_test_Gest_Ltmin1) <- c("CDE0", "CDE1", "CDE2")
for(i in 1:n_replications) {
# Generate data
dat_test <- simulate_data_test(1000)
# Exposure models with Lag1 covariates
fit_PS_X2 <- glm(X_2 ~ X_1 + Y_1 + C1_b, data = dat_test )
fit_PS_X1 <- glm(X_1 ~ X_0 + Y_0 + C1_b, data = dat_test )
fit_PS_X0 <- glm(X_0 ~ C1_b, data = dat_test)
# Calculate predicted exposure
dat_test$PS2 <- predict.glm(fit_PS_X2, type = "response")
dat_test$PS1 <- predict.glm(fit_PS_X1, type = "response")
dat_test$PS0 <- predict.glm(fit_PS_X0, type = "response")
# Outcome models
fit_CDE2 <- glm(Y_3 ~ X_2 + X_1 + Y_1 + C1_b + PS2, data = dat_test)
dat_test$H2 <- dat_test$Y_3 - coef(fit_CDE2)["X_2"] * dat_test$X_2 # Create blipped-down version Y
fit_CDE1 <- glm(H2 ~ X_1 + X_0 + Y_0 + C1_b + PS1, data = dat_test)
dat_test$H1 <- dat_test$H2 - coef(fit_CDE1)["X_1"] * dat_test$X_1 # Create blipped-down version Y
fit_CDE0 <- glm(H1 ~ X_0 + C1_b + PS0, data = dat_test)
out_test_Gest_Ltmin1[i, "CDE0"] <- coef(fit_CDE0)["X_0"]
out_test_Gest_Ltmin1[i, "CDE1"] <- coef(fit_CDE1)["X_1"]
out_test_Gest_Ltmin1[i, "CDE2"] <- coef(fit_CDE2)["X_2"]
}
colMeans(out_test_Gest_Ltmin1)