Corrected Standard Errors
corrected-se.Rmd
This vignette shows how to obtain Taylor-series corrected standard errors in 2S-PA, accounting for the uncertainty in the weights for obtaining the factor scores. It is described in this paper.
The second-stage estimation treats as known the loading matrix and the error covariance matrix of the factor scores as indicators of the true latent latent variables. However, those are estimates based on the first-stage measurement model. When the sample size is large and/or the reliability of the factor scores are high, the uncertainty in the estimated loading and error covariance matrix is generally negligible. Otherwise, a first-order correction on the standard errors of the second-stage estimation is possible, as illustrated below.
First-order correction for SE
where is the Jacobian matrix of with respect to , or
where is the naive covariance matrix of the structural parameter estimates assuming the measurement error variance parameter, , is known, is the Hessian matrix of the log-likelihood with respect to , and can be obtained in the first-stage measurement model analysis.
This example is based on the Political Democracy data used in https://lavaan.ugent.be/tutorial/sem.html and as an example in Bollen (1989)’s book.
Separate Measurement Models
model <- '
# latent variable definitions
ind60 =~ x1 + x2 + x3
dem60 =~ y1 + y2 + y3 + y4
# regressions
dem60 ~ ind60
'
cfa_ind60 <- cfa("ind60 =~ x1 + x2 + x3", data = PoliticalDemocracy)
# Regression factor scores
fs1 <- get_fs_lavaan(cfa_ind60, vfsLT = TRUE)
# Delta method variance of (loading, intercept)
vldev1 <- attr(fs1, which = "vfsLT")
cfa_dem60 <- cfa("dem60 =~ y1 + y2 + y3 + y4",
data = PoliticalDemocracy)
# Regression factor scores
fs2 <- get_fs_lavaan(cfa_dem60, vfsLT = TRUE)
# Delta method variance of (loading, intercept)
vldev2 <- attr(fs2, which = "vfsLT")
fs_dat <- data.frame(
fs_ind60 = fs1$fs_ind60,
fs_dem60 = fs2$fs_dem60
)
# Combine sampling variance of loading and error variance
# Note: loadings first, then error variance
vldev <- block_diag(vldev1, vldev2)[c(1, 3, 2, 4), c(1, 3, 2, 4)]
# 2S-PA
# Assemble loadings
ld <- block_diag(attr(fs1, which = "fsL"),
attr(fs2, which = "fsL"))
ev <- block_diag(attr(fs1, which = "fsT"),
attr(fs2, which = "fsT"))
tspa_fit <- tspa(model = "dem60 ~ ind60",
data = fs_dat,
fsL = ld,
fsT = ev)
# Unadjusted covariance
vcov(tspa_fit)
#> d60~60 i60~~6 d60~~6
#> dem60~ind60 0.131
#> ind60~~ind60 -0.001 0.006
#> dem60~~dem60 -0.006 0.000 0.477
# Adjusted covariance matrix
(vc_cor <- vcov_corrected(tspa_fit, vfsLT = vldev, which_free = c(1, 4, 5, 7)))
#> d60~60 i60~~6 d60~~6
#> dem60~ind60 0.133
#> ind60~~ind60 -0.001 0.006
#> dem60~~dem60 0.001 0.000 0.518
# Corrected standard errors
sqrt(diag(vc_cor))
#> dem60~ind60 ind60~~ind60 dem60~~dem60
#> 0.36415174 0.07585532 0.71996886
Standardized solution
tspa_est_std <- function(ld_ev) {
ld1 <- ld
ev1 <- ev
ld1[c(1, 4)] <- ld_ev[1:2]
ev1[c(1, 4)] <- ld_ev[3:4]
tfit <- tspa(model = "dem60 ~ ind60",
data = fs_dat,
fsL = ld1,
fsT = ev1)
standardizedSolution(tfit)$est.std[8]
}
tspa_est_std(c(ld[c(1, 4)], ev[c(1, 4)]))
#> [1] 0.4531693
# Jacobian
jac_std <- numDeriv::jacobian(tspa_est_std,
x = c(ld[c(1, 4)], ev[c(1, 4)]))
# Corrected standard error
sqrt(
standardizedSolution(tspa_fit)[8, "se"]^2 +
jac_std %*% vldev %*% t(jac_std)
)
#> [,1]
#> [1,] 0.1013887
Compare to joint structural and measurement model
sem_fit <- sem(model, data = PoliticalDemocracy)
# Larger standard error
(vc_j <-
vcov(sem_fit)[c("dem60~ind60", "ind60~~ind60", "dem60~~dem60"),
c("dem60~ind60", "ind60~~ind60", "dem60~~dem60")])
#> dem60~ind60 ind60~~ind60 dem60~~dem60
#> dem60~ind60 0.143355743 -0.0033833569 0.0648673299
#> ind60~~ind60 -0.003383357 0.0075268147 -0.0001098306
#> dem60~~dem60 0.064867330 -0.0001098306 0.7655699561
sqrt(diag(vc_j))
#> dem60~ind60 ind60~~ind60 dem60~~dem60
#> 0.37862348 0.08675722 0.87496855
Bootstrap Standard Errors
run_tspa <- function(df, inds) {
fs_ind60 <- get_fs(data = df[inds, ], model = "ind60 =~ x1 + x2 + x3",
se = "none", test = "none")
fs_dem60 <- get_fs(data = df[inds, ], model = "dem60 =~ y1 + y2 + y3 + y4",
se = "none", test = "none")
fs_dat <- cbind(fs_ind60, fs_dem60)
# Assemble loadings
ld <- block_diag(attr(fs_ind60, which = "fsL"),
attr(fs_dem60, which = "fsL"))
ev <- block_diag(attr(fs_ind60, which = "fsT"),
attr(fs_dem60, which = "fsT"))
tspa_fit <- tspa(model = "dem60 ~ ind60",
data = fs_dat,
fsL = ld,
fsT = ev,
test = "none")
coef(tspa_fit)
}
boo <- boot(PoliticalDemocracy, statistic = run_tspa, R = 1999)
# Use MAD to downweigh outlying replications
boo$t |>
apply(MARGIN = 2, FUN = mad) |>
setNames(c("dem60~ind60", "ind60~~ind60", "dem60~~dem60"))
#> dem60~ind60 ind60~~ind60 dem60~~dem60
#> 0.32521386 0.07271009 0.87676911
The standard errors seem to diverge slightly among methods.
SE(dem60~ind60
) with bootstrap is the lowest.
SE(ind60~~ind60
) with the joint model is particularly
higher than the other two. SE(dem60~~dem60
) is the lowest
with the corrected 2S-PA. It should be pointed out that the joint model
and 2S-PA are different estimators.
Joint Measurement Model
cfa_joint <- cfa("ind60 =~ x1 + x2 + x3
dem60 =~ y1 + y2 + y3 + y4",
data = PoliticalDemocracy)
# Factor score
fs_joint <- get_fs_lavaan(cfa_joint, vfsLT = TRUE)
# Delta method variance
vldev_joint <- attr(fs_joint, which = "vfsLT")
tspa_fit2 <- tspa(model = "dem60 ~ ind60",
data = fs_joint,
fsT = attr(fs_joint, "fsT"),
fsL = attr(fs_joint, "fsL"))
# Unadjusted covariance
vcov(tspa_fit2)
#> d60~60 i60~~6 d60~~6
#> dem60~ind60 0.124
#> ind60~~ind60 -0.001 0.006
#> dem60~~dem60 -0.006 0.000 0.430
# Adjusted covariance
(vc2_cor <- vcov_corrected(tspa_fit2, vfsLT = vldev_joint))
#> d60~60 i60~~6 d60~~6
#> dem60~ind60 0.130
#> ind60~~ind60 -0.001 0.006
#> dem60~~dem60 -0.009 0.000 0.493
# Corrected standard errors
sqrt(diag(vc2_cor))
#> dem60~ind60 ind60~~ind60 dem60~~dem60
#> 0.36049260 0.07654824 0.70229676
Bootstrap Standard Errors
run_tspa2 <- function(df, inds) {
fs_joint <- get_fs(df[inds, ],
model = "ind60 =~ x1 + x2 + x3
dem60 =~ y1 + y2 + y3 + y4",
se = "none", test = "none")
tspa_fit2 <- tspa(model = "dem60 ~ ind60",
data = fs_joint,
fsT = attr(fs_joint, "fsT"),
fsL = attr(fs_joint, "fsL"),
test = "none")
coef(tspa_fit2)
}
boo2 <- boot(PoliticalDemocracy, statistic = run_tspa2, R = 1999)
# run_tspa2b <- function(df, inds) {
# fsb_joint <- get_fs(df[inds, ],
# model = "ind60 =~ x1 + x2 + x3
# dem60 =~ y1 + y2 + y3 + y4",
# se = "none", test = "none", method = "Bartlett")
# tspa_fit2b <- tspa(model = "dem60 ~ ind60",
# data = fsb_joint,
# fsT = attr(fsb_joint, "fsT"),
# fsL = diag(2),
# test = "none")
# coef(tspa_fit2b)
# }
# boo2b <- boot(PoliticalDemocracy, statistic = run_tspa2b, R = 4999)
# run_sem2 <- function(df, inds) {
# sem_fit <- sem(model, data = df[inds, ])
# coef(sem_fit)[c(6, 14, 15)]
# }
# boo2j <- boot(PoliticalDemocracy, statistic = run_sem2, R = 4999)
# Use MAD to downweigh outlying replications
boo2$t |>
apply(MARGIN = 2, FUN = mad) |>
setNames(c("dem60~ind60", "ind60~~ind60", "dem60~~dem60"))
#> dem60~ind60 ind60~~ind60 dem60~~dem60
#> 0.34364943 0.07851415 0.88168143
With Bartlett’s Method
# Factor score
fsb_joint <- get_fs(PoliticalDemocracy,
model = "ind60 =~ x1 + x2 + x3
dem60 =~ y1 + y2 + y3 + y4",
method = "Bartlett",
vfsLT = TRUE)
vldevb_joint <- attr(fsb_joint, which = "vfsLT")
tspa_fit2b <- tspa(model = "dem60 ~ ind60",
data = fsb_joint,
fsT = attr(fsb_joint, "fsT"),
fsL = diag(2) |>
`dimnames<-`(list(c("fs_ind60", "fs_dem60"),
c("ind60", "dem60"))))
# Unadjusted covariance matrix
vcov(tspa_fit2b)
#> d60~60 i60~~6 d60~~6
#> dem60~ind60 0.124
#> ind60~~ind60 -0.001 0.006
#> dem60~~dem60 -0.006 0.000 0.430
# Adjusted covariance matrix
(vc2b_cor <- vcov_corrected(
tspa_fit2b,
# Exclude fixed loadings and error variance
vfsLT = vldevb_joint[c(5, 7), c(5, 7)],
# Specify which elements are free (error variances only)
which_free = c(5, 7)))
#> d60~60 i60~~6 d60~~6
#> dem60~ind60 0.124
#> ind60~~ind60 -0.001 0.006
#> dem60~~dem60 -0.006 0.000 0.448
# Corrected standard errors
sqrt(diag(vc2b_cor))
#> dem60~ind60 ind60~~ind60 dem60~~dem60
#> 0.35271752 0.07634634 0.66959806
Multiple Groups
# Multigroup, three-factor example
mod <- "
# latent variables
visual =~ x1 + x2 + x3
textual =~ x4 + x5 + x6
speed =~ x7 + x8 + x9
"
# Factor scores based on partial invariance
fs_dat <- get_fs(HolzingerSwineford1939, model = mod, std.lv = TRUE,
group = "school",
group.equal = c("loadings", "intercepts"),
group.partial = c("visual=~x2", "x7~1"))
fit <- cfa(mod,
data = HolzingerSwineford1939,
std.lv = TRUE,
group = "school",
group.equal = c("loadings", "intercepts"),
group.partial = c("visual=~x2", "x7~1"))
fs_dat <- get_fs_lavaan(fit)
vldev <- R2spa:::vcov_ld_evfs(fit)
tspa_fit <- tspa(model = "visual ~~ textual + speed
textual ~~ speed",
data = fs_dat,
group = "school",
fsL = attr(fs_dat, which = "fsL"),
fsT = attr(fs_dat, which = "fsT"))
# Unadjusted covariance
vcov(tspa_fit)[c(1:6, 10:15), c(1:6, 10:15)]
#> visual~~textual visual~~speed textual~~speed visual~~visual
#> visual~~textual 0.012032756 0.004143016 0.003562702 0.009059744
#> visual~~speed 0.004143016 0.015289623 0.005749677 0.006286186
#> textual~~speed 0.003562702 0.005749677 0.012308431 0.002169626
#> visual~~visual 0.009059744 0.006286186 0.002169626 0.026249333
#> textual~~textual 0.007226793 0.002111028 0.004878946 0.003126897
#> speed~~speed 0.001464756 0.006962597 0.006774549 0.001505415
#> visual~~textual.g2 0.000000000 0.000000000 0.000000000 0.000000000
#> visual~~speed.g2 0.000000000 0.000000000 0.000000000 0.000000000
#> textual~~speed.g2 0.000000000 0.000000000 0.000000000 0.000000000
#> visual~~visual.g2 0.000000000 0.000000000 0.000000000 0.000000000
#> textual~~textual.g2 0.000000000 0.000000000 0.000000000 0.000000000
#> speed~~speed.g2 0.000000000 0.000000000 0.000000000 0.000000000
#> textual~~textual speed~~speed visual~~textual.g2
#> visual~~textual 0.007226793 0.001464756 0.000000000
#> visual~~speed 0.002111028 0.006962597 0.000000000
#> textual~~speed 0.004878946 0.006774549 0.000000000
#> visual~~visual 0.003126897 0.001505415 0.000000000
#> textual~~textual 0.016702352 0.001425195 0.000000000
#> speed~~speed 0.001425195 0.032202255 0.000000000
#> visual~~textual.g2 0.000000000 0.000000000 0.011424159
#> visual~~speed.g2 0.000000000 0.000000000 0.005834597
#> textual~~speed.g2 0.000000000 0.000000000 0.006283615
#> visual~~visual.g2 0.000000000 0.000000000 0.008537899
#> textual~~textual.g2 0.000000000 0.000000000 0.007689097
#> speed~~speed.g2 0.000000000 0.000000000 0.003681750
#> visual~~speed.g2 textual~~speed.g2 visual~~visual.g2
#> visual~~textual 0.000000000 0.000000000 0.000000000
#> visual~~speed 0.000000000 0.000000000 0.000000000
#> textual~~speed 0.000000000 0.000000000 0.000000000
#> visual~~visual 0.000000000 0.000000000 0.000000000
#> textual~~textual 0.000000000 0.000000000 0.000000000
#> speed~~speed 0.000000000 0.000000000 0.000000000
#> visual~~textual.g2 0.005834597 0.006283615 0.008537899
#> visual~~speed.g2 0.020016361 0.008699815 0.010689107
#> textual~~speed.g2 0.008699815 0.016930572 0.004219633
#> visual~~visual.g2 0.010689107 0.004219633 0.021628068
#> textual~~textual.g2 0.002940790 0.006708957 0.003370422
#> speed~~speed.g2 0.017174234 0.011969242 0.005282811
#> textual~~textual.g2 speed~~speed.g2
#> visual~~textual 0.000000000 0.000000000
#> visual~~speed 0.000000000 0.000000000
#> textual~~speed 0.000000000 0.000000000
#> visual~~visual 0.000000000 0.000000000
#> textual~~textual 0.000000000 0.000000000
#> speed~~speed 0.000000000 0.000000000
#> visual~~textual.g2 0.007689097 0.003681750
#> visual~~speed.g2 0.002940790 0.017174234
#> textual~~speed.g2 0.006708957 0.011969242
#> visual~~visual.g2 0.003370422 0.005282811
#> textual~~textual.g2 0.017541486 0.002565923
#> speed~~speed.g2 0.002565923 0.055832832
# Adjusted covariance
vcov_corrected(tspa_fit, vfsLT = vldev)[c(1:6, 10:15), c(1:6, 10:15)]
#> visual~~textual visual~~speed textual~~speed visual~~visual
#> visual~~textual 1.766770e-02 3.948995e-03 1.680095e-03 0.0032658891
#> visual~~speed 3.948995e-03 2.625930e-02 6.180231e-03 -0.0002941259
#> textual~~speed 1.680095e-03 6.180231e-03 2.088330e-02 0.0034032016
#> visual~~visual 3.265889e-03 -2.941259e-04 3.403202e-03 0.0552337485
#> textual~~textual 6.343722e-03 3.471197e-03 4.310592e-03 0.0022325897
#> speed~~speed 2.319057e-03 -2.462920e-04 2.977648e-03 0.0039800365
#> visual~~textual.g2 2.894521e-04 -1.727969e-05 -1.158344e-04 -0.0036750998
#> visual~~speed.g2 -7.758231e-05 4.463544e-04 -3.159286e-04 -0.0017771299
#> textual~~speed.g2 -1.395251e-04 -2.939691e-05 1.074132e-04 0.0021179697
#> visual~~visual.g2 6.773687e-04 9.416669e-05 -5.286419e-04 -0.0139631267
#> textual~~textual.g2 -8.773851e-05 1.870183e-04 -5.236929e-05 0.0001038840
#> speed~~speed.g2 -3.614222e-04 2.154682e-04 1.739917e-04 0.0030238026
#> textual~~textual speed~~speed visual~~textual.g2
#> visual~~textual 6.343722e-03 2.319057e-03 2.894521e-04
#> visual~~speed 3.471197e-03 -2.462920e-04 -1.727969e-05
#> textual~~speed 4.310592e-03 2.977648e-03 -1.158344e-04
#> visual~~visual 2.232590e-03 3.980036e-03 -3.675100e-03
#> textual~~textual 1.943788e-02 -2.260608e-05 3.559274e-04
#> speed~~speed -2.260608e-05 5.904050e-02 1.853678e-03
#> visual~~textual.g2 3.559274e-04 1.853678e-03 2.079770e-02
#> visual~~speed.g2 -8.758319e-04 3.799116e-03 7.128677e-03
#> textual~~speed.g2 -2.295514e-05 -2.136909e-03 2.631124e-03
#> visual~~visual.g2 1.246137e-03 2.827516e-03 1.425193e-02
#> textual~~textual.g2 -2.028541e-04 -1.070262e-03 5.734502e-03
#> speed~~speed.g2 7.282334e-04 -1.578688e-02 3.266464e-04
#> visual~~speed.g2 textual~~speed.g2 visual~~visual.g2
#> visual~~textual -7.758231e-05 -1.395251e-04 6.773687e-04
#> visual~~speed 4.463544e-04 -2.939691e-05 9.416669e-05
#> textual~~speed -3.159286e-04 1.074132e-04 -5.286419e-04
#> visual~~visual -1.777130e-03 2.117970e-03 -1.396313e-02
#> textual~~textual -8.758319e-04 -2.295514e-05 1.246137e-03
#> speed~~speed 3.799116e-03 -2.136909e-03 2.827516e-03
#> visual~~textual.g2 7.128677e-03 2.631124e-03 1.425193e-02
#> visual~~speed.g2 3.246016e-02 6.793932e-03 8.478633e-03
#> textual~~speed.g2 6.793932e-03 2.230005e-02 -7.599378e-04
#> visual~~visual.g2 8.478633e-03 -7.599378e-04 1.079909e-01
#> textual~~textual.g2 4.641147e-03 6.987265e-03 4.758042e-04
#> speed~~speed.g2 6.636008e-03 1.596428e-02 -3.656654e-03
#> textual~~textual.g2 speed~~speed.g2
#> visual~~textual -8.773851e-05 -0.0003614222
#> visual~~speed 1.870183e-04 0.0002154682
#> textual~~speed -5.236929e-05 0.0001739917
#> visual~~visual 1.038840e-04 0.0030238026
#> textual~~textual -2.028541e-04 0.0007282334
#> speed~~speed -1.070262e-03 -0.0157868803
#> visual~~textual.g2 5.734502e-03 0.0003266464
#> visual~~speed.g2 4.641147e-03 0.0066360081
#> textual~~speed.g2 6.987265e-03 0.0159642788
#> visual~~visual.g2 4.758042e-04 -0.0036566537
#> textual~~textual.g2 2.062776e-02 0.0018416930
#> speed~~speed.g2 1.841693e-03 0.1076812729