Skip to contents

The example is from https://lavaan.ugent.be/tutorial/sem.html.

Factor score

For a CFA model with multiple latent factors, even when each indicator only loads on one factor, the resulting factor scores generally are weighted composites of ALL indicators. Consider the regression score, which has the form

𝛈̃=𝐀(𝐲𝛍̂)+𝛂\tilde{\boldsymbol \eta} = \mathbf{A}(\mathbf{y} - \hat{\boldsymbol \mu}) + \boldsymbol{\alpha}

where 𝐀=𝚿𝚲𝚺̂1\mathbf{A} = \boldsymbol{\Psi}\boldsymbol{\Lambda}^\top \hat{\boldsymbol{\Sigma}}^{-1} is a qq×\timespp matrix, 𝛍̂=𝛎+𝚲𝛂\hat{\boldsymbol \mu} = \boldsymbol{\nu} + \boldsymbol{\Lambda} \boldsymbol{\alpha} and 𝚺=𝚲𝚿𝚲+𝚯\boldsymbol{\Sigma} = \boldsymbol{\Lambda} \boldsymbol{\Psi}\boldsymbol{\Lambda}^\top + \boldsymbol{\Theta} are the model-implied means and covariances of the indicators 𝐲\mathbf{y}, and 𝛂\boldsymbol{\alpha} and 𝚿\boldsymbol{\Psi} are the latent means and latent covariances.

Therefore, assuming that the model is correctly specified such that 𝐲=𝛎+𝚲𝛈+𝛆\mathbf{y} = \boldsymbol{\nu} + \boldsymbol{\Lambda} \boldsymbol{\eta} + \boldsymbol{\varepsilon},

𝛈̃=𝐀(𝚲𝛈+𝛆𝚲𝛂)+𝛂=(𝐈𝐀𝚲)𝛂+𝐀𝚲𝛈+𝐀𝛆. \begin{aligned} \tilde{\boldsymbol \eta} & = \mathbf{A}(\boldsymbol{\Lambda} \boldsymbol{\eta} + \boldsymbol{\varepsilon} - \boldsymbol{\Lambda} \boldsymbol{\alpha}) + \boldsymbol{\alpha} \\ & = (\mathbf{I} - \mathbf{A}\boldsymbol{\Lambda}) \boldsymbol{\alpha} + \mathbf{A}\boldsymbol{\Lambda} \boldsymbol{\eta} + \mathbf{A}\boldsymbol{\varepsilon}. \end{aligned}

If we consider 𝛈̃\tilde{\boldsymbol \eta} as indicators of 𝛈\boldsymbol \eta, we can see that 𝛎𝛈̃=(𝐈𝐀𝚲)𝛂\boldsymbol{\nu}_\tilde{\boldsymbol{\eta}} = (\mathbf{I} - \mathbf{A}\boldsymbol{\Lambda}) \boldsymbol{\alpha} is the intercept, 𝚲𝛈̃=𝐀𝚲\boldsymbol{\Lambda}_\tilde{\boldsymbol{\eta}} = \mathbf{A}\boldsymbol{\Lambda} is the qq×\timesqq loading matrix, and 𝚯𝛈̃=𝐀𝛆\boldsymbol{\Theta}_\tilde{\boldsymbol{\eta}} = \mathbf{A}\boldsymbol{\varepsilon} is the error covariance matrix.

We can see that 𝚲𝛈̃\boldsymbol{\Lambda}_\tilde{\boldsymbol{\eta}} is generally not diagonal, as the following shows:

# CFA
my_cfa <- "
  # latent variables
    ind60 =~ x1 + x2 + x3
    dem60 =~ y1 + y2 + y3 + y4
"
cfa_fit <- cfa(model = my_cfa,
               data  = PoliticalDemocracy,
               std.lv = TRUE)
# A matrix
pars <- lavInspect(cfa_fit, what = "est")
lambda_mat <- pars$lambda
psi_mat <- pars$psi
sigma_mat <- cfa_fit@implied$cov[[1]]
ginvsigma <- MASS::ginv(sigma_mat)
alambda <- psi_mat %*% crossprod(lambda_mat, ginvsigma %*% lambda_mat)
alambda
#>            ind60      dem60
#> ind60 0.95538579 0.01834111
#> dem60 0.05816994 0.86888887

We can also use R2spa::get_fs():

(fs_dat <- get_fs(PoliticalDemocracy, model = my_cfa, std.lv = TRUE)) |> head()
#>     fs_ind60   fs_dem60 fs_ind60_se fs_dem60_se ind60_by_fs_ind60
#> 1 -0.8101568 -1.3119114   0.1859987   0.3012901         0.9553858
#> 2  0.1888466 -1.3644831   0.1859987   0.3012901         0.9553858
#> 3  1.0960931  1.3107705   0.1859987   0.3012901         0.9553858
#> 4  1.8702043  1.4849083   0.1859987   0.3012901         0.9553858
#> 5  1.2446060  0.9193277   0.1859987   0.3012901         0.9553858
#> 6  0.3348621  0.4886331   0.1859987   0.3012901         0.9553858
#>   ind60_by_fs_dem60 dem60_by_fs_ind60 dem60_by_fs_dem60 ev_fs_ind60
#> 1        0.05816994        0.01834111         0.8688889  0.03459552
#> 2        0.05816994        0.01834111         0.8688889  0.03459552
#> 3        0.05816994        0.01834111         0.8688889  0.03459552
#> 4        0.05816994        0.01834111         0.8688889  0.03459552
#> 5        0.05816994        0.01834111         0.8688889  0.03459552
#> 6        0.05816994        0.01834111         0.8688889  0.03459552
#>   ecov_fs_dem60_fs_ind60 ev_fs_dem60
#> 1            0.004017388  0.09077571
#> 2            0.004017388  0.09077571
#> 3            0.004017388  0.09077571
#> 4            0.004017388  0.09077571
#> 5            0.004017388  0.09077571
#> 6            0.004017388  0.09077571

Therefore, we need to specify cross-loadings when using 2S-PA.

tspa_fit <- tspa(model = "dem60 ~ ind60", data = fs_dat,
                 fsT = attr(fs_dat, "fsT"), 
                 fsL = attr(fs_dat, "fsL"))
cat(attr(tspa_fit, "tspaModel"))
#> # latent variables (indicated by factor scores)
#>  ind60 =~ c(0.955385785456617) * fs_ind60 + c(0.0581699407736295) * fs_dem60
#>  # latent variables (indicated by factor scores)
#>  dem60 =~ c(0.0183411131911887) * fs_ind60 + c(0.868888868390616) * fs_dem60
#>  # constrain the errors
#> fs_ind60 ~~ c(0.0345955179271981) * fs_ind60
#>  # constrain the errors
#> fs_dem60 ~~ c(0.00401738814566851) * fs_ind60
#>  # constrain the errors
#> fs_dem60 ~~ c(0.0907757082269278) * fs_dem60
#>  
#>  # structural model
#>  dem60 ~ ind60
parameterestimates(tspa_fit)
#>         lhs op      rhs   est    se     z pvalue ci.lower ci.upper
#> 1     ind60 =~ fs_ind60 0.955 0.000    NA     NA    0.955    0.955
#> 2     ind60 =~ fs_dem60 0.058 0.000    NA     NA    0.058    0.058
#> 3     dem60 =~ fs_ind60 0.018 0.000    NA     NA    0.018    0.018
#> 4     dem60 =~ fs_dem60 0.869 0.000    NA     NA    0.869    0.869
#> 5  fs_ind60 ~~ fs_ind60 0.035 0.000    NA     NA    0.035    0.035
#> 6  fs_ind60 ~~ fs_dem60 0.004 0.000    NA     NA    0.004    0.004
#> 7  fs_dem60 ~~ fs_dem60 0.091 0.000    NA     NA    0.091    0.091
#> 8     dem60  ~    ind60 0.460 0.113 4.089      0    0.240    0.681
#> 9     ind60 ~~    ind60 1.000 0.169 5.900      0    0.668    1.332
#> 10    dem60 ~~    dem60 0.788 0.150 5.267      0    0.495    1.081

which is more consistent with the SEM results.

Three-factor model example

# CFA
cfa_3fac <-  "
  # latent variables
    ind60 =~ x1 + x2 + x3
    dem60 =~ y1 + y2 + y3 + y4
    dem65 =~ y5 + y6 + y7 + y8
"
cfa_3fac_fit <- cfa(model = cfa_3fac,
                    data  = PoliticalDemocracy,
                    std.lv = TRUE)
# A matrix
pars <- lavInspect(cfa_3fac_fit, what = "est")
lambda_mat <- pars$lambda
psi_mat <- pars$psi
sigma_mat <- cfa_3fac_fit@implied$cov[[1]]
ginvsigma <- MASS::ginv(sigma_mat)
alambda <- psi_mat %*% crossprod(lambda_mat, ginvsigma %*% lambda_mat)
alambda
#>             ind60        dem60      dem65
#> ind60  0.95064774 -0.005967124 0.02951139
#> dem60 -0.02069724  0.533020047 0.41603787
#> dem65  0.09868528  0.401095944 0.49133377

We can also use R2spa::get_fs():

(fs_dat_3fac <- get_fs(PoliticalDemocracy, model = cfa_3fac, std.lv = TRUE)) |>
  head()
#>     fs_ind60   fs_dem60    fs_dem65 fs_ind60_se fs_dem60_se fs_dem65_se
#> 1 -0.7990475 -1.1571745 -1.13720655   0.1844193   0.2422541   0.2270976
#> 2  0.2152486 -1.0238236 -0.80871922   0.1844193   0.2422541   0.2270976
#> 3  1.1028297  1.3890842  1.46389950   0.1844193   0.2422541   0.2270976
#> 4  1.8585004  1.3163888  1.43045560   0.1844193   0.2422541   0.2270976
#> 5  1.2432985  0.9522026  1.03348589   0.1844193   0.2422541   0.2270976
#> 6  0.3067994  0.1109206  0.05023217   0.1844193   0.2422541   0.2270976
#>   ind60_by_fs_ind60 ind60_by_fs_dem60 ind60_by_fs_dem65 dem60_by_fs_ind60
#> 1         0.9506477       -0.02069724        0.09868528      -0.005967124
#> 2         0.9506477       -0.02069724        0.09868528      -0.005967124
#> 3         0.9506477       -0.02069724        0.09868528      -0.005967124
#> 4         0.9506477       -0.02069724        0.09868528      -0.005967124
#> 5         0.9506477       -0.02069724        0.09868528      -0.005967124
#> 6         0.9506477       -0.02069724        0.09868528      -0.005967124
#>   dem60_by_fs_dem60 dem60_by_fs_dem65 dem65_by_fs_ind60 dem65_by_fs_dem60
#> 1           0.53302         0.4010959        0.02951139         0.4160379
#> 2           0.53302         0.4010959        0.02951139         0.4160379
#> 3           0.53302         0.4010959        0.02951139         0.4160379
#> 4           0.53302         0.4010959        0.02951139         0.4160379
#> 5           0.53302         0.4010959        0.02951139         0.4160379
#> 6           0.53302         0.4010959        0.02951139         0.4160379
#>   dem65_by_fs_dem65 ev_fs_ind60 ecov_fs_dem60_fs_ind60 ev_fs_dem60
#> 1         0.4913338   0.0340105           0.0003881641  0.05868703
#> 2         0.4913338   0.0340105           0.0003881641  0.05868703
#> 3         0.4913338   0.0340105           0.0003881641  0.05868703
#> 4         0.4913338   0.0340105           0.0003881641  0.05868703
#> 5         0.4913338   0.0340105           0.0003881641  0.05868703
#> 6         0.4913338   0.0340105           0.0003881641  0.05868703
#>   ecov_fs_dem65_fs_ind60 ecov_fs_dem65_fs_dem60 ev_fs_dem65
#> 1            0.005026024             0.05337525   0.0515733
#> 2            0.005026024             0.05337525   0.0515733
#> 3            0.005026024             0.05337525   0.0515733
#> 4            0.005026024             0.05337525   0.0515733
#> 5            0.005026024             0.05337525   0.0515733
#> 6            0.005026024             0.05337525   0.0515733

Therefore, we need to specify cross-loadings when using 2S-PA.

tspa_fit_3fac <- tspa(model = "dem60 ~ ind60
              dem65 ~ ind60 + dem60",
              data = fs_dat_3fac,
              fsT = attr(fs_dat_3fac, "fsT"),
              fsL = attr(fs_dat_3fac, "fsL"))
cat(attr(tspa_fit_3fac, "tspaModel"))
#> # latent variables (indicated by factor scores)
#>  ind60 =~ c(0.950647742844847) * fs_ind60 + c(-0.0206972362902851) * fs_dem60 + c(0.0986852834195767) * fs_dem65
#>  # latent variables (indicated by factor scores)
#>  dem60 =~ c(-0.00596712444175395) * fs_ind60 + c(0.533020047181513) * fs_dem60 + c(0.401095943690546) * fs_dem65
#>  # latent variables (indicated by factor scores)
#>  dem65 =~ c(0.0295113941487134) * fs_ind60 + c(0.416037872255944) * fs_dem60 + c(0.491333767947424) * fs_dem65
#>  # constrain the errors
#> fs_ind60 ~~ c(0.0340104951546674) * fs_ind60
#>  # constrain the errors
#> fs_dem60 ~~ c(0.00038816407039879) * fs_ind60
#>  # constrain the errors
#> fs_dem65 ~~ c(0.00502602420598866) * fs_ind60
#>  # constrain the errors
#> fs_dem60 ~~ c(0.0586870313996517) * fs_dem60
#>  # constrain the errors
#> fs_dem65 ~~ c(0.0533752457536215) * fs_dem60
#>  # constrain the errors
#> fs_dem65 ~~ c(0.0515732993693881) * fs_dem65
#>  
#>  # structural model
#>  dem60 ~ ind60
#>               dem65 ~ ind60 + dem60
parameterestimates(tspa_fit_3fac)
#>         lhs op      rhs    est    se      z pvalue ci.lower ci.upper
#> 1     ind60 =~ fs_ind60  0.951 0.000     NA     NA    0.951    0.951
#> 2     ind60 =~ fs_dem60 -0.021 0.000     NA     NA   -0.021   -0.021
#> 3     ind60 =~ fs_dem65  0.099 0.000     NA     NA    0.099    0.099
#> 4     dem60 =~ fs_ind60 -0.006 0.000     NA     NA   -0.006   -0.006
#> 5     dem60 =~ fs_dem60  0.533 0.000     NA     NA    0.533    0.533
#> 6     dem60 =~ fs_dem65  0.401 0.000     NA     NA    0.401    0.401
#> 7     dem65 =~ fs_ind60  0.030 0.000     NA     NA    0.030    0.030
#> 8     dem65 =~ fs_dem60  0.416 0.000     NA     NA    0.416    0.416
#> 9     dem65 =~ fs_dem65  0.491 0.000     NA     NA    0.491    0.491
#> 10 fs_ind60 ~~ fs_ind60  0.034 0.000     NA     NA    0.034    0.034
#> 11 fs_ind60 ~~ fs_dem60  0.000 0.000     NA     NA    0.000    0.000
#> 12 fs_ind60 ~~ fs_dem65  0.005 0.000     NA     NA    0.005    0.005
#> 13 fs_dem60 ~~ fs_dem60  0.059 0.000     NA     NA    0.059    0.059
#> 14 fs_dem60 ~~ fs_dem65  0.053 0.000     NA     NA    0.053    0.053
#> 15 fs_dem65 ~~ fs_dem65  0.052 0.000     NA     NA    0.052    0.052
#> 16    dem60  ~    ind60  0.448 0.114  3.937  0.000    0.225    0.671
#> 17    dem65  ~    ind60  0.146 0.069  2.112  0.035    0.010    0.281
#> 18    dem65  ~    dem60  0.913 0.073 12.435  0.000    0.769    1.057
#> 19    ind60 ~~    ind60  1.000 0.169  5.902  0.000    0.668    1.332
#> 20    dem60 ~~    dem60  0.799 0.153  5.224  0.000    0.499    1.099
#> 21    dem65 ~~    dem65  0.026 0.043  0.620  0.535   -0.057    0.110

Compare to SEM:

sem_3fac <- sem("
  # latent variables
    ind60 =~ x1 + x2 + x3
    dem60 =~ y1 + y2 + y3 + y4
    dem65 =~ y5 + y6 + y7 + y8
  # structural model
    dem60 ~ ind60
    dem65 ~ ind60 + dem60
  ",
  data = PoliticalDemocracy
)
standardizedSolution(sem_3fac)
#>      lhs op   rhs est.std    se      z pvalue ci.lower ci.upper
#> 1  ind60 =~    x1   0.920 0.023 39.823  0.000    0.874    0.965
#> 2  ind60 =~    x2   0.973 0.017 58.858  0.000    0.941    1.006
#> 3  ind60 =~    x3   0.872 0.031 28.034  0.000    0.811    0.933
#> 4  dem60 =~    y1   0.845 0.039 21.698  0.000    0.769    0.921
#> 5  dem60 =~    y2   0.760 0.054 14.142  0.000    0.655    0.866
#> 6  dem60 =~    y3   0.705 0.063 11.225  0.000    0.582    0.828
#> 7  dem60 =~    y4   0.860 0.036 23.650  0.000    0.789    0.931
#> 8  dem65 =~    y5   0.803 0.046 17.602  0.000    0.714    0.893
#> 9  dem65 =~    y6   0.783 0.049 15.918  0.000    0.687    0.879
#> 10 dem65 =~    y7   0.819 0.043 19.122  0.000    0.735    0.903
#> 11 dem65 =~    y8   0.847 0.038 22.389  0.000    0.773    0.921
#> 12 dem60  ~ ind60   0.448 0.102  4.393  0.000    0.248    0.648
#> 13 dem65  ~ ind60   0.146 0.070  2.071  0.038    0.008    0.283
#> 14 dem65  ~ dem60   0.913 0.048 19.120  0.000    0.819    1.006
#> 15    x1 ~~    x1   0.154 0.042  3.636  0.000    0.071    0.238
#> 16    x2 ~~    x2   0.053 0.032  1.634  0.102   -0.010    0.116
#> 17    x3 ~~    x3   0.240 0.054  4.417  0.000    0.133    0.346
#> 18    y1 ~~    y1   0.286 0.066  4.348  0.000    0.157    0.415
#> 19    y2 ~~    y2   0.422 0.082  5.166  0.000    0.262    0.582
#> 20    y3 ~~    y3   0.503 0.089  5.676  0.000    0.329    0.676
#> 21    y4 ~~    y4   0.261 0.063  4.173  0.000    0.138    0.383
#> 22    y5 ~~    y5   0.355 0.073  4.842  0.000    0.211    0.499
#> 23    y6 ~~    y6   0.387 0.077  5.024  0.000    0.236    0.538
#> 24    y7 ~~    y7   0.329 0.070  4.696  0.000    0.192    0.467
#> 25    y8 ~~    y8   0.283 0.064  4.416  0.000    0.157    0.408
#> 26 ind60 ~~ ind60   1.000 0.000     NA     NA    1.000    1.000
#> 27 dem60 ~~ dem60   0.799 0.091  8.737  0.000    0.620    0.978
#> 28 dem65 ~~ dem65   0.026 0.046  0.579  0.562   -0.063    0.116