2S-PA with Missing Data
Mark Lai
2024-09-06
missing-data.Rmd
Single-Factor Model
For illustration, we will create an artificial data set with two missing data pattern:
set.seed(2225)
library(lavaan)
#> This is lavaan 0.6-18
#> lavaan is FREE software! Please report any bugs.
library(R2spa)
library(umx)
#> Loading required package: OpenMx
#> To take full advantage of multiple cores, use:
#> mxOption(key='Number of Threads', value=parallel::detectCores()) #now
#> Sys.setenv(OMP_NUM_THREADS=parallel::detectCores()) #before library(OpenMx)
#> For an overview type '?umx'
#>
#> Attaching package: 'umx'
#> The following object is masked from 'package:stats':
#>
#> loadings
data(PoliticalDemocracy)
pd2 <- PoliticalDemocracy
# Add MCAR missing data
pd2[!rbinom(75, size = 1, prob = 0.7), 9] <- NA
Factor scores with lavaan::lavPredict()
:
fit <- cfa("ind60 =~ x1 + x2 + x3", data = pd2, missing = "fiml")
fs_lavaan <- lavPredict(fit, method = "Bartlett", se = TRUE, fsm = TRUE)
# # extract scoring matrix
# fsm <- attr(fs_lavaan, "fsm")[[1]]
# # se for observations with complete data
# th <- lavInspect(fit, what = "est")$theta
# sqrt(diag(fsm %*% th %*% t(fsm)))
# # se for observations missing item 1 (need conditional normal)
# fsm_mis1 <- with(lavInspect(fit, what = "est"), {
# lambda <- lambda[2:3, , drop = FALSE]
# theta <- theta[2:3, 2:3, drop = FALSE]
# # covy <- (lambda %*% psi %*% t(lambda) + theta)
# # ginvcovy <- MASS::ginv(covy)
# # tlam_invcov <- crossprod(lambda, ginvcovy)
# # psi %*% tlam_invcov
# lamt_th_lam <- crossprod(lambda, solve(theta, lambda))
# solve(lamt_th_lam, crossprod(lambda, solve(theta)))
# })
# sqrt(diag(fsm_mis1 %*% th[2:3, 2:3, drop = FALSE] %*% t(fsm_mis1)))
# From R2spa
fs <- R2spa::get_fs_lavaan(fit, method = "Bartlett")
# Compare factor scores
plot(fs_lavaan, fs$fs_ind60)
abline(a = 0, b = 1)
Multiple group
mg_fit <- cfa("ind60 =~ x1 + x2 + x3",
data = cbind(pd2, group = rep(1:2, c(40, 35))),
missing = "fiml",
group = "group"
)
R2spa::get_fs_lavaan(mg_fit)
#> $`1`
#> fs_ind60 fs_ind60_se ind60_by_fs_ind60 ev_fs_ind60 group
#> 1 -0.629897437 0.06327033 0.9907480 0.004003135 1
#> 2 -0.003597801 0.06481192 0.9902871 0.004200585 1
#> 3 0.531621454 0.06327033 0.9907480 0.004003135 1
#> 4 1.093150997 0.06481192 0.9902871 0.004200585 1
#> 5 0.751322856 0.06481192 0.9902871 0.004200585 1
#> 6 0.032310335 0.06481192 0.9902871 0.004200585 1
#> 7 -0.001194933 0.06327033 0.9907480 0.004003135 1
#> 8 -0.081265180 0.06481192 0.9902871 0.004200585 1
#> 9 0.087671139 0.06327033 0.9907480 0.004003135 1
#> 10 0.158890547 0.06327033 0.9907480 0.004003135 1
#> 11 0.575151483 0.06327033 0.9907480 0.004003135 1
#> 12 0.490722985 0.06327033 0.9907480 0.004003135 1
#> 13 1.199738770 0.06481192 0.9902871 0.004200585 1
#> 14 0.080689638 0.06481192 0.9902871 0.004200585 1
#> 15 0.479293472 0.06327033 0.9907480 0.004003135 1
#> 16 0.304725162 0.06327033 0.9907480 0.004003135 1
#> 17 -0.006880589 0.06481192 0.9902871 0.004200585 1
#> 18 -0.232201592 0.06327033 0.9907480 0.004003135 1
#> 19 0.734594167 0.06481192 0.9902871 0.004200585 1
#> 20 0.871472984 0.06327033 0.9907480 0.004003135 1
#> 21 0.749217422 0.06327033 0.9907480 0.004003135 1
#> 22 0.726471949 0.06327033 0.9907480 0.004003135 1
#> 23 0.397941787 0.06481192 0.9902871 0.004200585 1
#> 24 0.446676514 0.06481192 0.9902871 0.004200585 1
#> 25 0.726400152 0.06327033 0.9907480 0.004003135 1
#> 26 -1.023191030 0.06481192 0.9902871 0.004200585 1
#> 27 -0.229774171 0.06327033 0.9907480 0.004003135 1
#> 28 -0.589883012 0.06481192 0.9902871 0.004200585 1
#> 29 -0.944983987 0.06327033 0.9907480 0.004003135 1
#> 30 -1.473429633 0.06327033 0.9907480 0.004003135 1
#> 31 -0.386068258 0.06481192 0.9902871 0.004200585 1
#> 32 -1.339301841 0.06327033 0.9907480 0.004003135 1
#> 33 -0.282299077 0.06327033 0.9907480 0.004003135 1
#> 34 -0.197502508 0.06327033 0.9907480 0.004003135 1
#> 35 -0.419983839 0.06327033 0.9907480 0.004003135 1
#> 36 -0.737560746 0.06327033 0.9907480 0.004003135 1
#> 37 0.066032960 0.06327033 0.9907480 0.004003135 1
#> 38 -0.644149975 0.06327033 0.9907480 0.004003135 1
#> 39 -1.198883624 0.06481192 0.9902871 0.004200585 1
#> 40 -0.082046566 0.06327033 0.9907480 0.004003135 1
#>
#> $`2`
#> fs_ind60 fs_ind60_se ind60_by_fs_ind60 ev_fs_ind60 group
#> 1 0.11486330 0.132523 0.9432493 0.01756235 2
#> 2 -0.41073122 0.132523 0.9432493 0.01756235 2
#> 3 -0.15711924 0.132523 0.9432493 0.01756235 2
#> 4 -0.76084565 0.172422 0.8992304 0.02972934 2
#> 5 -0.88026683 0.132523 0.9432493 0.01756235 2
#> 6 -0.74845673 0.132523 0.9432493 0.01756235 2
#> 7 -0.85061057 0.132523 0.9432493 0.01756235 2
#> 8 -0.55025232 0.172422 0.8992304 0.02972934 2
#> 9 0.26138642 0.132523 0.9432493 0.01756235 2
#> 10 0.32087466 0.132523 0.9432493 0.01756235 2
#> 11 0.50410306 0.132523 0.9432493 0.01756235 2
#> 12 -0.30439462 0.132523 0.9432493 0.01756235 2
#> 13 0.57282757 0.132523 0.9432493 0.01756235 2
#> 14 0.21646719 0.172422 0.8992304 0.02972934 2
#> 15 0.56163448 0.132523 0.9432493 0.01756235 2
#> 16 0.30298279 0.172422 0.8992304 0.02972934 2
#> 17 0.50001516 0.172422 0.8992304 0.02972934 2
#> 18 0.74949815 0.172422 0.8992304 0.02972934 2
#> 19 0.62556020 0.172422 0.8992304 0.02972934 2
#> 20 1.22362501 0.172422 0.8992304 0.02972934 2
#> 21 0.28280329 0.132523 0.9432493 0.01756235 2
#> 22 -0.72202779 0.132523 0.9432493 0.01756235 2
#> 23 -0.53354639 0.172422 0.8992304 0.02972934 2
#> 24 0.02656067 0.132523 0.9432493 0.01756235 2
#> 25 -0.10062174 0.132523 0.9432493 0.01756235 2
#> 26 0.19661034 0.172422 0.8992304 0.02972934 2
#> 27 0.17401287 0.172422 0.8992304 0.02972934 2
#> 28 -0.85294266 0.132523 0.9432493 0.01756235 2
#> 29 -0.30843400 0.132523 0.9432493 0.01756235 2
#> 30 -0.49547393 0.172422 0.8992304 0.02972934 2
#> 31 -0.46151949 0.132523 0.9432493 0.01756235 2
#> 32 0.39305019 0.132523 0.9432493 0.01756235 2
#> 33 1.01756027 0.132523 0.9432493 0.01756235 2
#> 34 0.23274175 0.132523 0.9432493 0.01756235 2
#> 35 -0.13995593 0.132523 0.9432493 0.01756235 2
#>
#> attr(,"fsT")
#> attr(,"fsT")$`1`
#> fs_ind60
#> fs_ind60 0.004003135
#>
#> attr(,"fsT")$`2`
#> fs_ind60
#> fs_ind60 0.01756235
#>
#> attr(,"fsL")
#> attr(,"fsL")$`1`
#> ind60
#> fs_ind60 0.990748
#>
#> attr(,"fsL")$`2`
#> ind60
#> fs_ind60 0.9432493
#>
#> attr(,"fsb")
#> attr(,"fsb")$`1`
#> fs_ind60
#> 0
#>
#> attr(,"fsb")$`2`
#> fs_ind60
#> 0
#>
#> attr(,"scoring_matrix")
#> attr(,"scoring_matrix")$`1`
#> [,1] [,2] [,3]
#> ind60 0.04744877 0.4008488 0.01499426
#>
#> attr(,"scoring_matrix")$`2`
#> [,1] [,2] [,3]
#> ind60 0.4368268 0.1137155 0.1263754
Multiple-Factor Model
Regression scores
# Make last 10 cases completely missing on y5-y8
pd2[66:75, 5:8] <- NA
fit2 <- cfa("
dem60 =~ a * y1 + b * y2 + c * y3 + d * y4
dem65 =~ a * y5 + b * y6 + c * y7 + d * y8
y1 ~~ y5
y2 ~~ y6
y3 ~~ y7
y4 ~~ y8
", data = pd2, missing = "fiml")
fs2r <- lavPredict(fit2,
method = "EBM", se = TRUE,
acov = TRUE, fsm = TRUE
)
acov_r <- attr(fs2r, "acov")[[1]]
# Obtain tidy-ed factor scores data
fs_dat <- R2spa::augment_lav_predict(fit2)
# Run 2spa with OpenMx
# Build OpenMx model
fsreg_umx <- umxLav2RAM(
"
dem65 ~ dem60
dem65 + dem60 ~ 1
",
printTab = FALSE
)
#>
#> ?plot.MxModel options: std, means, digits, strip_zero, file, splines=T/F/ortho,..., min=, max =, same = , fixed, resid= 'circle|line|none'
cross_load <- matrix(c(
"dem60_by_fs_dem60", "dem60_by_fs_dem65",
"dem65_by_fs_dem60", "dem65_by_fs_dem65"
), nrow = 2) |>
`dimnames<-`(list(c("fs_dem60", "fs_dem65"), c("dem60", "dem65")))
err_cov <- matrix(c(
"ev_fs_dem60", "ecov_fs_dem60_fs_dem65",
"ecov_fs_dem60_fs_dem65", "ev_fs_dem65"
), nrow = 2) |>
`dimnames<-`(rep(list(c("fs_dem60", "fs_dem65")), 2))
# Need to make factor scores NA when loadings/error variances
# are inadmissible
fs_dat[66:75, "fs_dem65"] <- NA
tspa_mx <- tspa_mx_model(fsreg_umx,
data = fs_dat,
mat_ld = cross_load, mat_ev = err_cov
)
# Run OpenMx
tspa_mx_fit <- mxRun(tspa_mx)
#> Running 2SPAD with 5 parameters
# Summarize the results
summary(tspa_mx_fit)
#> Summary of 2SPAD
#>
#> free parameters:
#> name matrix row col Estimate Std.Error A
#> 1 dem60_to_dem65 m1.A dem65 dem60 9.125862e-01 0.07157358
#> 2 dem65_with_dem65 m1.S dem65 dem65 5.369808e-01 0.23101709
#> 3 dem60_with_dem60 m1.S dem60 dem60 4.559667e+00 0.84137910
#> 4 one_to_dem65 m1.M 1 dem65 1.424734e-08 0.14212140
#> 5 one_to_dem60 m1.M 1 dem60 -2.019359e-08 0.26210295
#>
#> Model Statistics:
#> | Parameters | Degrees of Freedom | Fit (-2lnL units)
#> Model: 5 275 404.9387
#> Saturated: NA NA NA
#> Independence: NA NA NA
#> Number of observations/statistics: 75/280
#>
#> Information Criteria:
#> | df Penalty | Parameters Penalty | Sample-Size Adjusted
#> AIC: -145.0613 414.9387 415.8083
#> BIC: -782.3705 426.5261 410.7675
#> CFI: NA
#> TLI: 1 (also known as NNFI)
#> RMSEA: 0 [95% CI (NA, NA)]
#> Prob(RMSEA <= 0.05): NA
#> To get additional fit indices, see help(mxRefModels)
#> timestamp: 2024-09-06 04:19:22
#> Wall clock time: 0.05901241 secs
#> optimizer: SLSQP
#> OpenMx version number: 2.21.12
#> Need help? See help(mxSummary)
Bartlett scores
# Obtain tidy-ed factor scores data
fsb_dat <- augment_lav_predict(fit2, method = "Bartlett")
#> Warning in sqrt(diag(fsT)): NaNs produced
tspab_mx <- tspa_mx_model(fsreg_umx,
data = fsb_dat,
mat_ld = attr(fsb_dat, which = "ld"),
mat_ev = attr(fsb_dat, which = "ev")
)
# Run OpenMx
tspab_mx_fit <- mxRun(tspab_mx)
#> Running 2SPAD with 5 parameters
# Summarize the results
summary(tspab_mx_fit)
#> Summary of 2SPAD
#>
#> free parameters:
#> name matrix row col Estimate Std.Error A
#> 1 dem60_to_dem65 m1.A dem65 dem60 9.125862e-01 0.07157513
#> 2 dem65_with_dem65 m1.S dem65 dem65 5.369808e-01 0.23103302
#> 3 dem60_with_dem60 m1.S dem60 dem60 4.559667e+00 0.84138189
#> 4 one_to_dem65 m1.M 1 dem65 1.424717e-08 0.14211902
#> 5 one_to_dem60 m1.M 1 dem60 -2.004115e-08 0.26209661
#>
#> Model Statistics:
#> | Parameters | Degrees of Freedom | Fit (-2lnL units)
#> Model: 5 275 536.4213
#> Saturated: NA NA NA
#> Independence: NA NA NA
#> Number of observations/statistics: 75/280
#>
#> Information Criteria:
#> | df Penalty | Parameters Penalty | Sample-Size Adjusted
#> AIC: -13.57873 546.4213 547.2908
#> BIC: -650.88796 558.0087 542.2500
#> CFI: NA
#> TLI: 1 (also known as NNFI)
#> RMSEA: 0 [95% CI (NA, NA)]
#> Prob(RMSEA <= 0.05): NA
#> To get additional fit indices, see help(mxRefModels)
#> timestamp: 2024-09-06 04:19:22
#> Wall clock time: 0.07445168 secs
#> optimizer: SLSQP
#> OpenMx version number: 2.21.12
#> Need help? See help(mxSummary)
Compared to Joint Model
jfit <- sem("
dem60 =~ a * y1 + b * y2 + c * y3 + d * y4
dem65 =~ a * y5 + b * y6 + c * y7 + d * y8
y1 ~~ y5
y2 ~~ y6
y3 ~~ y7
y4 ~~ y8
dem65 ~ dem60
", data = pd2, missing = "fiml")
summary(jfit)
#> lavaan 0.6-18 ended normally after 53 iterations
#>
#> Estimator ML
#> Optimization method NLMINB
#> Number of model parameters 29
#> Number of equality constraints 3
#>
#> Number of observations 75
#> Number of missing patterns 2
#>
#> Model Test User Model:
#>
#> Test statistic 27.499
#> Degrees of freedom 18
#> P-value (Chi-square) 0.070
#>
#> Parameter Estimates:
#>
#> Standard errors Standard
#> Information Observed
#> Observed information based on Hessian
#>
#> Latent Variables:
#> Estimate Std.Err z-value P(>|z|)
#> dem60 =~
#> y1 (a) 1.000
#> y2 (b) 1.337 0.153 8.738 0.000
#> y3 (c) 1.182 0.132 8.942 0.000
#> y4 (d) 1.334 0.136 9.777 0.000
#> dem65 =~
#> y5 (a) 1.000
#> y6 (b) 1.337 0.153 8.738 0.000
#> y7 (c) 1.182 0.132 8.942 0.000
#> y8 (d) 1.334 0.136 9.777 0.000
#>
#> Regressions:
#> Estimate Std.Err z-value P(>|z|)
#> dem65 ~
#> dem60 0.913 0.074 12.357 0.000
#>
#> Covariances:
#> Estimate Std.Err z-value P(>|z|)
#> .y1 ~~
#> .y5 0.627 0.395 1.587 0.113
#> .y2 ~~
#> .y6 1.238 0.765 1.618 0.106
#> .y3 ~~
#> .y7 1.534 0.695 2.207 0.027
#> .y4 ~~
#> .y8 0.200 0.551 0.363 0.716
#>
#> Intercepts:
#> Estimate Std.Err z-value P(>|z|)
#> .y1 5.465 0.298 18.348 0.000
#> .y2 4.256 0.442 9.629 0.000
#> .y3 6.563 0.397 16.534 0.000
#> .y4 4.453 0.379 11.733 0.000
#> .y5 5.181 0.313 16.561 0.000
#> .y6 2.782 0.403 6.899 0.000
#> .y7 6.249 0.367 17.043 0.000
#> .y8 3.996 0.383 10.420 0.000
#>
#> Variances:
#> Estimate Std.Err z-value P(>|z|)
#> .y1 2.093 0.490 4.276 0.000
#> .y2 6.502 1.252 5.195 0.000
#> .y3 5.443 1.038 5.242 0.000
#> .y4 2.689 0.716 3.756 0.000
#> .y5 2.528 0.551 4.588 0.000
#> .y6 3.677 0.827 4.446 0.000
#> .y7 3.386 0.760 4.453 0.000
#> .y8 2.628 0.702 3.745 0.000
#> dem60 4.560 1.038 4.394 0.000
#> .dem65 0.537 0.258 2.085 0.037
Multiple group
Just to demonstrate the syntax for obtaining the input data
mg_fit2 <- cfa("
dem60 =~ a * y1 + b * y2 + c * y3 + d * y4
dem65 =~ a * y5 + b * y6 + c * y7 + d * y8
y1 ~~ y5
y2 ~~ y6
y3 ~~ y7
y4 ~~ y8
",
data = cbind(pd2, group = rep(1:2, c(40, 35))),
missing = "fiml", group = "group"
)
#> Warning: lavaan->lavParTable():
#> using a single label per parameter in a multiple group setting implies
#> imposing equality constraints across all the groups; If this is not
#> intended, either remove the label(s), or use a vector of labels (one for
#> each group); See the Multiple groups section in the man page of
#> model.syntax.
augment_lav_predict(mg_fit2)