Using Two-Stage Path Analysis with Definition Variables for Latent Interactions with Categorical Indicators
Mark Lai
2024-09-06
categorical-interaction.Rmd
library(mirt) # for IRT
#> Loading required package: stats4
#> Loading required package: lattice
library(OpenMx)
#> To take full advantage of multiple cores, use:
#> mxOption(key='Number of Threads', value=parallel::detectCores()) #now
#> Sys.setenv(OMP_NUM_THREADS=parallel::detectCores()) #before library(OpenMx)
library(umx)
#> For an overview type '?umx'
#>
#> Attaching package: 'umx'
#> The following object is masked from 'package:stats':
#>
#> loadings
library(R2spa)
set.seed(591028)
Simulate Data
#' Population parameters
num_obs <- 5000 # relatively large sample
cov_xm <- .45
betas <- c(.2, .3, -.4)
# Results seem more accurate with more items
lambdax <- 1.7 * c(.6, .7, .8, .5, .5, .3, .9, .7, .6) # on normal ogive metric
lambdam <- 1.7 * c(.8, .7, .7, .6, .5) # on normal ogive metric
lambday <- c(.5, .7, .9)
thresx <- matrix(c(1, 1.5, 1.3, 0.5, 2, -0.5, -1, 0.5, 0.8), nrow = 1) # binary
thresm <- matrix(c(-2, -2, -0.4, -0.5, 0,
-1.5, -0.5, 0, 0, 0.5,
-1, 1, 1.5, 0.8, 1), nrow = 3,
byrow = TRUE) # ordinal with 4 categories
inty <- 1.5 # intercept of y
# error variance of y
r2y <- crossprod(
betas,
matrix(c(1, cov_xm, 0, cov_xm, 1, 0, 0, 0, 1 + cov_xm^2), nrow = 3)
) %*% betas
evar <- as.numeric(1 - r2y)
#' Simulate latent variables X and M with variances of 1, and the disturbance of
#' Y
cov_xm_ey <- matrix(c(1, cov_xm, 0,
cov_xm, 1, 0,
0, 0, evar), nrow = 3)
eta <- MASS::mvrnorm(num_obs, mu = rep(0, 3), Sigma = cov_xm_ey,
empirical = TRUE)
# Add product term
eta <- cbind(eta, eta[, 1] * eta[, 2])
#' Compute latent y (standardized)
etay <- inty + eta[, -3] %*% betas + eta[, 3]
# Verify the structural coefficients with eta known
lm(etay ~ eta[, -3])
#>
#> Call:
#> lm(formula = etay ~ eta[, -3])
#>
#> Coefficients:
#> (Intercept) eta[, -3]1 eta[, -3]2 eta[, -3]3
#> 1.4938 0.2002 0.2999 -0.3862
#' Simulate y (continuous)
y <- t(
lambday %*% t(etay) +
rnorm(num_obs * length(lambday), sd = sqrt(1 - lambday^2))
)
#' Simulate latent continuous responses for X and M
xstar <- eta[, 1] %*% t(lambdax) + rnorm(num_obs * length(lambdax))
mstar <- eta[, 2] %*% t(lambdam) + rnorm(num_obs * length(lambdam))
#' Obtain categorical items
x <- vapply(
seq_along(lambdax),
FUN = \(i) {
findInterval(xstar[, i], thresx[, i])
},
FUN.VALUE = numeric(num_obs))
m <- vapply(
seq_along(lambdam),
FUN = \(i) {
findInterval(mstar[, i], thresm[, i])
},
FUN.VALUE = numeric(num_obs))
Compute Factor Scores
Y
fsy <- get_fs(data = y, std.lv = TRUE)
X
In this example, we use EAP scores, which are conceptually similar to the regression scores for continuous data.
irtx <- mirt(data.frame(x), itemtype = "2PL",
verbose = FALSE) # IRT (2-PL) for x
marginal_rxx(irtx) # marginal reliability
#> [1] 0.7606752
fsx <- fscores(irtx, full.scores.SE = TRUE)
empirical_rxx(fsx) # empirical reliability
#> F1
#> 0.7732511
M
irtm <- mirt(data.frame(m), itemtype = "graded",
verbose = FALSE) # IRT (GRM) for m
marginal_rxx(irtm) # marginal reliability
#> [1] 0.7931549
fsm <- fscores(irtm, full.scores.SE = TRUE)
empirical_rxx(fsm) # empirical reliability
#> F1
#> 0.7942367
Loading and Error Variances for Product of Factor scores
Let and be factor scores of the predictor and the moderator. Then
So the loading is , and the error variance for the product indicator is
Directly Using Factor Scores
# Assemble data
fs_dat <- data.frame(fsy[c(1, 3, 4)], fsx, fsm) |>
setNames(c(
"fsy", "rel_fsy", "ev_fsy",
"fsx", "se_fsx", "fsm", "se_fsm"
)) |>
# Compute reliability; only needed for 2S-PA
within(expr = {
rel_fsx <- 1 - se_fsx^2
rel_fsm <- 1 - se_fsm^2
ev_fsx <- se_fsx^2 * (1 - se_fsx^2)
ev_fsm <- se_fsm^2 * (1 - se_fsm^2)
}) |>
# Add interaction
within(expr = {
fsxm <- fsx * fsm
ld_fsxm <- rel_fsx * rel_fsm
ev_fsxm <- rel_fsx^2 * ev_fsm + rel_fsm^2 * ev_fsx + ev_fsm * ev_fsx
})
lm(fsy ~ fsx * fsm, data = fs_dat) # some bias
#>
#> Call:
#> lm(formula = fsy ~ fsx * fsm, data = fs_dat)
#>
#> Coefficients:
#> (Intercept) fsx fsm fsx:fsm
#> 0.09867 0.19539 0.25989 -0.35788
Two-Stage Path Analysis
- Create OpenMx model without latent variables
fsreg_mx <- mxModel("str",
type = "RAM",
manifestVars = c("fsy", "fsx", "fsm", "fsxm"),
# Path coefficients
mxPath(
from = c("fsx", "fsm", "fsxm"), to = "fsy",
values = 0
),
# Variances
mxPath(
from = c("fsy", "fsx", "fsm", "fsxm"),
to = c("fsy", "fsx", "fsm", "fsxm"),
arrows = 2,
values = c(0.6, 1, 1, 1)
),
# Covariances
mxPath(
from = c("fsx", "fsm", "fsxm"),
to = c("fsx", "fsm", "fsxm"),
arrows = 2, connect = "unique.pairs",
values = 0
),
mxPath(
from = "one", to = c("fsy", "fsx", "fsm", "fsxm"),
values = 0
)
)
fsreg_umx <- umxLav2RAM(
"
fsy ~ b1 * fsx + b2 * fsm + b3 * fsxm
fsy + fsx + fsm + fsxm ~ 1
fsx ~~ vx * fsx + fsm + fsxm
fsm ~~ vm * fsm + fsxm
",
printTab = FALSE)
#>
#> ?plot.MxModel options: std, means, digits, strip_zero, file, splines=T/F/ortho,..., min=, max =, same = , fixed, resid= 'circle|line|none'
DiagrammeR::grViz(omxGraphviz(fsreg_mx))
#> digraph "str" {
#> node [style=filled, fontname="Arial", fontsize=16];
#> /* Manifest Variables */
#> { rank = max; fsy; fsx; fsm; fsxm }
#> fsy [shape=square, fillcolor="#a9fab1", height=0.5, width=0.5];
#> fsx [shape=square, fillcolor="#a9fab1", height=0.5, width=0.5];
#> fsm [shape=square, fillcolor="#a9fab1", height=0.5, width=0.5];
#> fsxm [shape=square, fillcolor="#a9fab1", height=0.5, width=0.5];
#> /* Means */
#> one [shape=triangle];
#> /* Paths */
#> fsx -> fsy[dir=forward];
#> fsm -> fsy[dir=forward];
#> fsxm -> fsy[dir=forward];
#> fsy -> fsy[dir=both, headport=s, tailport=s];
#> fsx -> fsx[dir=both, headport=s, tailport=s];
#> fsx -> fsm[dir=both];
#> fsx -> fsxm[dir=both];
#> fsm -> fsm[dir=both, headport=s, tailport=s];
#> fsm -> fsxm[dir=both];
#> fsxm -> fsxm[dir=both, headport=s, tailport=s];
#> one -> fsy[dir=forward];
#> one -> fsx[dir=forward];
#> one -> fsm[dir=forward];
#> one -> fsxm[dir=forward];
#> }
- Create loading and error covariance matrices
# Loading
matL <- mxMatrix(
type = "Diag", nrow = 4, ncol = 4,
free = FALSE,
labels = c("data.rel_fsy", "data.rel_fsx", "data.rel_fsm", "data.ld_fsxm"),
name = "L"
)
# Error
matE <- mxMatrix(
type = "Diag", nrow = 4, ncol = 4,
free = FALSE,
labels = c("data.ev_fsy", "data.ev_fsx", "data.ev_fsm", "data.ev_fsxm"),
name = "E"
)
- Combine (1) and (2), and run 2S-PA
tspa_mx <-
tspa_mx_model(
mxModel(fsreg_umx,
mxAlgebra(b1 * sqrt(vx), name = "stdx_b1"),
mxAlgebra(b2 * sqrt(vm), name = "stdx_b2"),
mxAlgebra(b3 * sqrt(vx) * sqrt(vm), name = "stdx_b3"),
mxCI(c("stdx_b1", "stdx_b2", "stdx_b3"))),
data = fs_dat,
mat_ld = matL, mat_ev = matE)
# Run OpenMx
tspa_mx_fit <- mxRun(tspa_mx, intervals = TRUE)
#> Running 2SPAD with 14 parameters
# Summarize the results (takes a minute or so)
# Also include the X-Standardized coefficients
summary(tspa_mx_fit)
#> Summary of 2SPAD
#>
#> free parameters:
#> name matrix row col Estimate Std.Error A
#> 1 b1 m1.A fsy fsx 2.064624e-01 0.01897723
#> 2 b2 m1.A fsy fsm 2.989998e-01 0.01870619
#> 3 b3 m1.A fsy fsxm -4.152434e-01 0.01869847
#> 4 fsy_with_fsy m1.S fsy fsy 6.398847e-01 0.01956885
#> 5 vx m1.S fsx fsx 9.991969e-01 0.02776582
#> 6 fsm_with_fsx m1.S fsx fsm 4.455208e-01 0.01927455
#> 7 vm m1.S fsm fsm 9.996933e-01 0.02685778
#> 8 fsx_with_fsxm m1.S fsx fsxm 2.425816e-02 0.02230417
#> 9 fsm_with_fsxm m1.S fsm fsxm 4.883996e-03 0.02200107
#> 10 fsxm_with_fsxm m1.S fsxm fsxm 1.035316e+00 0.03835927
#> 11 one_to_fsy m1.M 1 fsy 1.871730e-01 0.01612887
#> 12 one_to_fsx m1.M 1 fsx -1.368772e-04 0.01615666
#> 13 one_to_fsm m1.M 1 fsm -6.265561e-05 0.01587288
#> 14 one_to_fsxm m1.M 1 fsxm 4.506424e-01 0.01832363
#>
#> confidence intervals:
#> lbound estimate ubound note
#> m1.stdx_b1[1,1] 0.1693079 0.2063795 0.2436209
#> m1.stdx_b2[1,1] 0.2622028 0.2989540 0.3356675
#> m1.stdx_b3[1,1] -0.4563615 -0.4150129 -0.3757701
#>
#> Model Statistics:
#> | Parameters | Degrees of Freedom | Fit (-2lnL units)
#> Model: 14 39986 50641.35
#> Saturated: NA NA NA
#> Independence: NA NA NA
#> Number of observations/statistics: 5000/40000
#>
#> Information Criteria:
#> | df Penalty | Parameters Penalty | Sample-Size Adjusted
#> AIC: -29330.65 50669.35 50669.43
#> BIC: -289927.14 50760.59 50716.10
#> CFI: NA
#> TLI: 1 (also known as NNFI)
#> RMSEA: 0 [95% CI (NA, NA)]
#> Prob(RMSEA <= 0.05): NA
#> To get additional fit indices, see help(mxRefModels)
#> timestamp: 2024-09-06 04:18:38
#> Wall clock time: 170.3362 secs
#> optimizer: SLSQP
#> OpenMx version number: 2.21.12
#> Need help? See help(mxSummary)
# Standard errors for X-Standardized coefficients
mxSE(m1.stdx_b1, tspa_mx_fit)
#> Treating first argument as an expression
#> [,1]
#> [1,] 0.01891419
mxSE(m1.stdx_b2, tspa_mx_fit)
#> Treating first argument as an expression
#> [,1]
#> [1,] 0.01868014
mxSE(m1.stdx_b3, tspa_mx_fit)
#> Treating first argument as an expression
#> [,1]
#> [1,] 0.02048697