Skip to contents
library(mirt)  # for IRT
#> Loading required package: stats4
#> Loading required package: lattice
library(OpenMx)
#> To take full advantage of multiple cores, use:
#>   mxOption(key='Number of Threads', value=parallel::detectCores()) #now
#>   Sys.setenv(OMP_NUM_THREADS=parallel::detectCores()) #before library(OpenMx)
library(umx)
#> For an overview type '?umx'
#> 
#> Attaching package: 'umx'
#> The following object is masked from 'package:stats':
#> 
#>     loadings
library(R2spa)
set.seed(591028)

Simulate Data

#' Population parameters
num_obs <- 5000  # relatively large sample
cov_xm <- .45
betas <- c(.2, .3, -.4)
# Results seem more accurate with more items
lambdax <- 1.7 * c(.6, .7, .8, .5, .5, .3, .9, .7, .6)  # on normal ogive metric
lambdam <- 1.7 * c(.8, .7, .7, .6, .5)  # on normal ogive metric
lambday <- c(.5, .7, .9)
thresx <- matrix(c(1, 1.5, 1.3, 0.5, 2, -0.5, -1, 0.5, 0.8), nrow = 1)  # binary
thresm <- matrix(c(-2, -2, -0.4, -0.5, 0,
                   -1.5, -0.5, 0, 0, 0.5,
                   -1, 1, 1.5, 0.8, 1), nrow = 3,
                 byrow = TRUE)  # ordinal with 4 categories
inty <- 1.5  # intercept of y
# error variance of y
r2y <- crossprod(
    betas,
    matrix(c(1, cov_xm, 0, cov_xm, 1, 0, 0, 0, 1 + cov_xm^2), nrow = 3)
    ) %*% betas
evar <- as.numeric(1 - r2y)

#' Simulate latent variables X and M with variances of 1, and the disturbance of
#' Y
cov_xm_ey <- matrix(c(1, cov_xm, 0,
                      cov_xm, 1, 0,
                      0, 0, evar), nrow = 3)
eta <- MASS::mvrnorm(num_obs, mu = rep(0, 3), Sigma = cov_xm_ey,
                     empirical = TRUE)
# Add product term
eta <- cbind(eta, eta[, 1] * eta[, 2])

#' Compute latent y (standardized)
etay <- inty + eta[, -3] %*% betas + eta[, 3]
# Verify the structural coefficients with eta known
lm(etay ~ eta[, -3])
#> 
#> Call:
#> lm(formula = etay ~ eta[, -3])
#> 
#> Coefficients:
#> (Intercept)   eta[, -3]1   eta[, -3]2   eta[, -3]3  
#>      1.4938       0.2002       0.2999      -0.3862

#' Simulate y (continuous)
y <- t(
    lambday %*% t(etay) +
        rnorm(num_obs * length(lambday), sd = sqrt(1 - lambday^2))
)

#' Simulate latent continuous responses for X and M
xstar <- eta[, 1] %*% t(lambdax) + rnorm(num_obs * length(lambdax))
mstar <- eta[, 2] %*% t(lambdam) + rnorm(num_obs * length(lambdam))
#' Obtain categorical items
x <- vapply(
    seq_along(lambdax),
    FUN = \(i) {
        findInterval(xstar[, i], thresx[, i])
    },
    FUN.VALUE = numeric(num_obs))
m <- vapply(
    seq_along(lambdam),
    FUN = \(i) {
        findInterval(mstar[, i], thresm[, i])
    },
    FUN.VALUE = numeric(num_obs))

Compute Factor Scores

Y

fsy <- get_fs(data = y, std.lv = TRUE)

X

In this example, we use EAP scores, which are conceptually similar to the regression scores for continuous data.

irtx <- mirt(data.frame(x), itemtype = "2PL",
             verbose = FALSE)  # IRT (2-PL) for x
marginal_rxx(irtx)  # marginal reliability
#> [1] 0.7606752
fsx <- fscores(irtx, full.scores.SE = TRUE)
empirical_rxx(fsx)  # empirical reliability
#>        F1 
#> 0.7732511

M

irtm <- mirt(data.frame(m), itemtype = "graded",
             verbose = FALSE)  # IRT (GRM) for m
marginal_rxx(irtm)  # marginal reliability
#> [1] 0.7931549
fsm <- fscores(irtm, full.scores.SE = TRUE)
empirical_rxx(fsm)  # empirical reliability
#>        F1 
#> 0.7942367

Loading and Error Variances for Product of Factor scores

Let η̃X\tilde \eta_X and η̃M\tilde \eta_M be factor scores of the predictor and the moderator. Then

η̃Xη̃M=(λXηX+eX̃)(λMηM+eM̃)=(λXλMηXηM)+λXηXeM+λMηMeX+eXeM\tilde \eta_X \tilde \eta_M = (\lambda_X \eta_X + e_{\tilde X}) (\lambda_M \eta_M + e_{\tilde M}) = (\lambda_X \lambda_M \eta_X \eta_M) + \lambda_X \eta_X e_M + \lambda_M \eta_M e_X + e_X e_M

So the loading is λXλM\lambda_X \lambda_M, and the error variance for the product indicator is

λX2V(ηX)V(eM)+λM2V(ηM)V(eX)+V(eX)V(eM)\lambda_X^2 V(\eta_X) V(e_M) + \lambda_M^2 V(\eta_M) V(e_X) + V(e_X) V(e_M)

Directly Using Factor Scores

# Assemble data
fs_dat <- data.frame(fsy[c(1, 3, 4)], fsx, fsm) |>
    setNames(c(
        "fsy", "rel_fsy", "ev_fsy",
        "fsx", "se_fsx", "fsm", "se_fsm"
    )) |>
    # Compute reliability; only needed for 2S-PA
    within(expr = {
        rel_fsx <- 1 - se_fsx^2
        rel_fsm <- 1 - se_fsm^2
        ev_fsx <- se_fsx^2 * (1 - se_fsx^2)
        ev_fsm <- se_fsm^2 * (1 - se_fsm^2)
    }) |>
    # Add interaction
    within(expr = {
        fsxm <- fsx * fsm
        ld_fsxm <- rel_fsx * rel_fsm
        ev_fsxm <- rel_fsx^2 * ev_fsm + rel_fsm^2 * ev_fsx + ev_fsm * ev_fsx
    })
lm(fsy ~ fsx * fsm, data = fs_dat)  # some bias
#> 
#> Call:
#> lm(formula = fsy ~ fsx * fsm, data = fs_dat)
#> 
#> Coefficients:
#> (Intercept)          fsx          fsm      fsx:fsm  
#>     0.09867      0.19539      0.25989     -0.35788

Two-Stage Path Analysis

  1. Create OpenMx model without latent variables
fsreg_mx <- mxModel("str",
    type = "RAM",
    manifestVars = c("fsy", "fsx", "fsm", "fsxm"),
    # Path coefficients
    mxPath(
        from = c("fsx", "fsm", "fsxm"), to = "fsy",
        values = 0
    ),
    # Variances
    mxPath(
        from = c("fsy", "fsx", "fsm", "fsxm"),
        to = c("fsy", "fsx", "fsm", "fsxm"),
        arrows = 2,
        values = c(0.6, 1, 1, 1)
    ),
    # Covariances
    mxPath(
        from = c("fsx", "fsm", "fsxm"),
        to = c("fsx", "fsm", "fsxm"),
        arrows = 2, connect = "unique.pairs",
        values = 0
    ),
    mxPath(
        from = "one", to = c("fsy", "fsx", "fsm", "fsxm"),
        values = 0
    )
)
fsreg_umx <- umxLav2RAM(
    "
      fsy ~ b1 * fsx + b2 * fsm + b3 * fsxm
      fsy + fsx + fsm + fsxm ~ 1
      fsx ~~ vx * fsx + fsm + fsxm
      fsm ~~ vm * fsm + fsxm
    ",
    printTab = FALSE)
#> 
#> ?plot.MxModel options: std, means, digits, strip_zero, file, splines=T/F/ortho,..., min=, max =, same = , fixed, resid= 'circle|line|none'
DiagrammeR::grViz(omxGraphviz(fsreg_mx))
#> digraph "str" {
#>   node [style=filled, fontname="Arial", fontsize=16];
#>       /* Manifest Variables */
#>       { rank = max; fsy; fsx; fsm; fsxm }
#>   fsy [shape=square, fillcolor="#a9fab1", height=0.5, width=0.5];
#>   fsx [shape=square, fillcolor="#a9fab1", height=0.5, width=0.5];
#>   fsm [shape=square, fillcolor="#a9fab1", height=0.5, width=0.5];
#>   fsxm [shape=square, fillcolor="#a9fab1", height=0.5, width=0.5];
#> /* Means */
#>   one [shape=triangle];
#> /* Paths */
#>   fsx -> fsy[dir=forward];
#>   fsm -> fsy[dir=forward];
#>   fsxm -> fsy[dir=forward];
#>   fsy -> fsy[dir=both, headport=s, tailport=s];
#>   fsx -> fsx[dir=both, headport=s, tailport=s];
#>   fsx -> fsm[dir=both];
#>   fsx -> fsxm[dir=both];
#>   fsm -> fsm[dir=both, headport=s, tailport=s];
#>   fsm -> fsxm[dir=both];
#>   fsxm -> fsxm[dir=both, headport=s, tailport=s];
#>   one -> fsy[dir=forward];
#>   one -> fsx[dir=forward];
#>   one -> fsm[dir=forward];
#>   one -> fsxm[dir=forward];
#> }
  1. Create loading and error covariance matrices
# Loading
matL <- mxMatrix(
    type = "Diag", nrow = 4, ncol = 4,
    free = FALSE,
    labels = c("data.rel_fsy", "data.rel_fsx", "data.rel_fsm", "data.ld_fsxm"),
    name = "L"
)
# Error
matE <- mxMatrix(
    type = "Diag", nrow = 4, ncol = 4,
    free = FALSE,
    labels = c("data.ev_fsy", "data.ev_fsx", "data.ev_fsm", "data.ev_fsxm"),
    name = "E"
)
  1. Combine (1) and (2), and run 2S-PA
tspa_mx <-
    tspa_mx_model(
        mxModel(fsreg_umx,
                mxAlgebra(b1 * sqrt(vx), name = "stdx_b1"),
                mxAlgebra(b2 * sqrt(vm), name = "stdx_b2"),
                mxAlgebra(b3 * sqrt(vx) * sqrt(vm), name = "stdx_b3"),
                mxCI(c("stdx_b1", "stdx_b2", "stdx_b3"))),
        data = fs_dat,
        mat_ld = matL, mat_ev = matE)
# Run OpenMx
tspa_mx_fit <- mxRun(tspa_mx, intervals = TRUE)
#> Running 2SPAD with 14 parameters
# Summarize the results (takes a minute or so)
# Also include the X-Standardized coefficients
summary(tspa_mx_fit)
#> Summary of 2SPAD 
#>  
#> free parameters:
#>              name matrix  row  col      Estimate  Std.Error A
#> 1              b1   m1.A  fsy  fsx  2.064624e-01 0.01897723  
#> 2              b2   m1.A  fsy  fsm  2.989998e-01 0.01870619  
#> 3              b3   m1.A  fsy fsxm -4.152434e-01 0.01869847  
#> 4    fsy_with_fsy   m1.S  fsy  fsy  6.398847e-01 0.01956885  
#> 5              vx   m1.S  fsx  fsx  9.991969e-01 0.02776582  
#> 6    fsm_with_fsx   m1.S  fsx  fsm  4.455208e-01 0.01927455  
#> 7              vm   m1.S  fsm  fsm  9.996933e-01 0.02685778  
#> 8   fsx_with_fsxm   m1.S  fsx fsxm  2.425816e-02 0.02230417  
#> 9   fsm_with_fsxm   m1.S  fsm fsxm  4.883996e-03 0.02200107  
#> 10 fsxm_with_fsxm   m1.S fsxm fsxm  1.035316e+00 0.03835927  
#> 11     one_to_fsy   m1.M    1  fsy  1.871730e-01 0.01612887  
#> 12     one_to_fsx   m1.M    1  fsx -1.368772e-04 0.01615666  
#> 13     one_to_fsm   m1.M    1  fsm -6.265561e-05 0.01587288  
#> 14    one_to_fsxm   m1.M    1 fsxm  4.506424e-01 0.01832363  
#> 
#> confidence intervals:
#>                     lbound   estimate     ubound note
#> m1.stdx_b1[1,1]  0.1693079  0.2063795  0.2436209     
#> m1.stdx_b2[1,1]  0.2622028  0.2989540  0.3356675     
#> m1.stdx_b3[1,1] -0.4563615 -0.4150129 -0.3757701     
#> 
#> Model Statistics: 
#>                |  Parameters  |  Degrees of Freedom  |  Fit (-2lnL units)
#>        Model:             14                  39986              50641.35
#>    Saturated:             NA                     NA                    NA
#> Independence:             NA                     NA                    NA
#> Number of observations/statistics: 5000/40000
#> 
#> Information Criteria: 
#>       |  df Penalty  |  Parameters Penalty  |  Sample-Size Adjusted
#> AIC:      -29330.65               50669.35                 50669.43
#> BIC:     -289927.14               50760.59                 50716.10
#> CFI: NA 
#> TLI: 1   (also known as NNFI) 
#> RMSEA:  0  [95% CI (NA, NA)]
#> Prob(RMSEA <= 0.05): NA
#> To get additional fit indices, see help(mxRefModels)
#> timestamp: 2024-09-06 04:18:38 
#> Wall clock time: 170.3362 secs 
#> optimizer:  SLSQP 
#> OpenMx version number: 2.21.12 
#> Need help?  See help(mxSummary)
# Standard errors for X-Standardized coefficients
mxSE(m1.stdx_b1, tspa_mx_fit)
#> Treating first argument as an expression
#>            [,1]
#> [1,] 0.01891419
mxSE(m1.stdx_b2, tspa_mx_fit)
#> Treating first argument as an expression
#>            [,1]
#> [1,] 0.01868014
mxSE(m1.stdx_b3, tspa_mx_fit)
#> Treating first argument as an expression
#>            [,1]
#> [1,] 0.02048697