EFA Scores
Mark Lai
2024-09-06
efa-score.Rmd
This vignette demonstrates how to use the
compute_fscore()
function to calculate factor scores based
on exploratory factor analysis, and compare the results to those
calculated by the psych::factor.scores()
function.
library(psych)
data(bfi, package = "psych")
library(lavaan)
#> This is lavaan 0.6-18
#> lavaan is FREE software! Please report any bugs.
#>
#> Attaching package: 'lavaan'
#> The following object is masked from 'package:psych':
#>
#> cor2cov
Example: Big Five Inventory
# Covariance (with FIML)
corr_bfi <- lavCor(bfi[1:25], missing = "fiml")
# EFA (Target rotation)
target_mat_bfi <- matrix(0, nrow = 25, ncol = 5)
target_mat_bfi[1:5, 1] <- NA
target_mat_bfi[6:10, 2] <- NA
target_mat_bfi[11:15, 3] <- NA
target_mat_bfi[16:20, 4] <- NA
target_mat_bfi[21:25, 5] <- NA
fa_target_bfi <- psych::fa(
corr_bfi, n.obs = 2436,
nfactors = 5,
rotate = "targetQ", Target = target_mat_bfi,
scores = "Bartlett")
#> Loading required namespace: GPArotation
# Factor correlations
fa_target_bfi$Phi |>
(`[`)(paste0("MR", 1:5), paste0("MR", 1:5)) |>
knitr::kable(digits = 2, caption = "EFA Factor Correlation")
MR1 | MR2 | MR3 | MR4 | MR5 | |
---|---|---|---|---|---|
MR1 | 1.00 | 0.22 | 0.35 | -0.07 | 0.15 |
MR2 | 0.22 | 1.00 | 0.27 | -0.20 | 0.20 |
MR3 | 0.35 | 0.27 | 1.00 | -0.20 | 0.16 |
MR4 | -0.07 | -0.20 | -0.20 | 1.00 | -0.01 |
MR5 | 0.15 | 0.20 | 0.16 | -0.01 | 1.00 |
# Correlation with sum scores
bfi |>
transform(A = (7 - A1) + A2 + A3 + A4 + A5,
C = C1 + C2 + C3 + (7 - C4) + (7 - C5),
E = (7 - E1) + (7 - E2) + E3 + E4 + E5,
N = N1 + N2 + N3 + N4 + N5,
O = O1 + (7 - O2) + O3 + O4 + (7 - O5)) |>
subset(select = A:O) |>
cor(use = "complete") |>
knitr::kable(digits = 2, caption = "Sum scores")
A | C | E | N | O | |
---|---|---|---|---|---|
A | 1.00 | 0.26 | 0.47 | -0.19 | 0.14 |
C | 0.26 | 1.00 | 0.27 | -0.23 | 0.19 |
E | 0.47 | 0.27 | 1.00 | -0.23 | 0.22 |
N | -0.19 | -0.23 | -0.23 | 1.00 | -0.08 |
O | 0.14 | 0.19 | 0.22 | -0.08 | 1.00 |
Hand calculate the Bartlett scores using weights
# Bartlett score for first person
bscores <-
psych::factor.scores(bfi[, 1:25], f = fa_target_bfi,
method = "Bartlett")
fa_target_bfi$weights
#> MR4 MR3 MR2 MR1 MR5
#> A1 0.019379345 0.088570322 0.037984352 -0.192609241 -0.0446095141
#> A2 0.039471717 -0.058827607 0.016480405 0.387253874 0.0174635927
#> A3 0.038138482 -0.002491405 -0.018448555 0.443385489 -0.0008791950
#> A4 0.008949806 -0.011419400 0.074609038 0.196183077 -0.1058123616
#> A5 -0.007115326 0.064563350 -0.028894457 0.297710866 -0.0033648030
#> C1 0.025793549 -0.013945609 0.252251588 -0.025663593 0.0878936560
#> C2 0.067216606 -0.054276500 0.383692956 0.040736916 0.0208035616
#> C3 0.023175743 -0.039728147 0.266210191 0.042308787 -0.0532943534
#> C4 0.045093233 0.017707633 -0.349869186 0.042230966 -0.0202260355
#> C5 0.055608096 -0.047779140 -0.303202360 0.040826161 0.1037402597
#> E1 0.002492943 -0.234998207 0.069439907 0.033510757 -0.0074443287
#> E2 0.069816249 -0.398147198 0.017193600 0.096706957 0.0534356364
#> E3 0.025889788 0.192977816 -0.031642142 0.069479646 0.1645920884
#> E4 -0.004903761 0.315619052 -0.010031608 0.099826497 -0.1699081214
#> E5 0.034244648 0.197071361 0.128843720 -0.049763160 0.0903700702
#> N1 0.376489849 0.174004405 0.036365710 -0.153681565 -0.0900410369
#> N2 0.315648329 0.098517700 0.033517347 -0.103972896 0.0008553925
#> N3 0.274596160 -0.019282873 -0.009840675 0.065937424 0.0331731810
#> N4 0.182196509 -0.186091836 -0.069870228 0.123634306 0.1272479199
#> N5 0.144850775 -0.078929736 0.008603568 0.133251027 -0.0834902117
#> O1 0.007965717 0.045782965 0.014054766 -0.025087007 0.3135869739
#> O2 0.045459925 0.014231220 -0.021761255 0.082163004 -0.2754285178
#> O3 0.016818868 0.080919045 -0.024446602 -0.001049249 0.4783313815
#> O4 0.052204814 -0.121040967 -0.019643988 0.103037548 0.2512799168
#> O5 0.024318925 0.034957969 0.005514044 0.026713299 -0.3498465254
# Calculation by hand
y1 <- scale(bfi[, 1:25])[1, ] # z-score
crossprod(fa_target_bfi$weights, as.matrix(y1))
#> [,1]
#> MR4 -0.379596163
#> MR3 0.008672754
#> MR2 -1.662210036
#> MR1 -1.056626928
#> MR5 -2.263921509
# Compare to results from psych::fa()
bscores$scores[1, ]
#> MR4 MR3 MR2 MR1 MR5
#> -0.379596163 0.008672754 -1.662210036 -1.056626928 -2.263921509
# Covariance of Bartlett scores
cov(bscores$scores, use = "complete") |>
(`[`)(paste0("MR", 1:5), paste0("MR", 1:5)) |>
knitr::kable(digits = 2, caption = "With Bartlett scores")
MR1 | MR2 | MR3 | MR4 | MR5 | |
---|---|---|---|---|---|
MR1 | 1.39 | 0.20 | 0.30 | -0.06 | 0.13 |
MR2 | 0.20 | 1.35 | 0.28 | -0.19 | 0.18 |
MR3 | 0.30 | 0.28 | 1.28 | -0.22 | 0.14 |
MR4 | -0.06 | -0.19 | -0.22 | 1.18 | 0.01 |
MR5 | 0.13 | 0.18 | 0.14 | 0.01 | 1.45 |
Using compute_fscore()
and perform a two-stage
analysis
# Obtain error covariances
yc <- scale(bfi[, 1:25])
yc <- yc[complete.cases(yc), ]
lam <- fa_target_bfi$loadings
colnames(lam) <- c("N", "E", "C", "A", "O")
phi <- fa_target_bfi$Phi
th <- diag(fa_target_bfi$uniquenesses)
# # scoring weights
# a <- solve(crossprod(lam, solve(th, lam)), t(solve(th, lam)))
# ecov_fs <- a %*% th %*% t(a)
# dimnames(ecov_fs) <- rep(list(c("N", "E", "C", "A", "O")), 2)
# Two-stage analysis
library(R2spa)
bfi_fs <- compute_fscore(yc, lambda = lam, theta = th,
method = "Bartlett", center_y = FALSE,
fs_matrices = TRUE)
head(bfi_fs)
#> N E C A O
#> 61617 -0.37959616 0.008672754 -1.66221004 -1.05662693 -2.2639215
#> 61618 0.05502992 0.657251998 -0.81281047 -0.21048629 -0.3557784
#> 61620 0.66912918 0.351719435 -0.00993787 -0.96855007 0.3107586
#> 61621 -0.14122632 -0.048191450 -1.35947549 0.01849824 -1.5743464
#> 61622 -0.36044079 0.557318357 -0.07396380 -1.02840458 -1.0629543
#> 61623 0.18540827 1.461174698 1.82621364 0.11707095 0.5289163
# Scoring matrix
attr(bfi_fs, which = "scoring_matrix")
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> N 0.01937935 0.03947172 0.038138482 0.008949806 -0.007115326 0.02579355
#> E 0.08857032 -0.05882761 -0.002491405 -0.011419400 0.064563350 -0.01394561
#> C 0.03798435 0.01648041 -0.018448555 0.074609038 -0.028894457 0.25225159
#> A -0.19260924 0.38725387 0.443385489 0.196183077 0.297710866 -0.02566359
#> O -0.04460951 0.01746359 -0.000879195 -0.105812362 -0.003364803 0.08789366
#> [,7] [,8] [,9] [,10] [,11] [,12]
#> N 0.06721661 0.02317574 0.04509323 0.05560810 0.002492943 0.06981625
#> E -0.05427650 -0.03972815 0.01770763 -0.04777914 -0.234998207 -0.39814720
#> C 0.38369296 0.26621019 -0.34986919 -0.30320236 0.069439907 0.01719360
#> A 0.04073692 0.04230879 0.04223097 0.04082616 0.033510757 0.09670696
#> O 0.02080356 -0.05329435 -0.02022604 0.10374026 -0.007444329 0.05343564
#> [,13] [,14] [,15] [,16] [,17] [,18]
#> N 0.02588979 -0.004903761 0.03424465 0.37648985 0.3156483287 0.274596160
#> E 0.19297782 0.315619052 0.19707136 0.17400440 0.0985176996 -0.019282873
#> C -0.03164214 -0.010031608 0.12884372 0.03636571 0.0335173471 -0.009840675
#> A 0.06947965 0.099826497 -0.04976316 -0.15368157 -0.1039728964 0.065937424
#> O 0.16459209 -0.169908121 0.09037007 -0.09004104 0.0008553925 0.033173181
#> [,19] [,20] [,21] [,22] [,23] [,24]
#> N 0.18219651 0.144850775 0.007965717 0.04545993 0.016818868 0.05220481
#> E -0.18609184 -0.078929736 0.045782965 0.01423122 0.080919045 -0.12104097
#> C -0.06987023 0.008603568 0.014054766 -0.02176125 -0.024446602 -0.01964399
#> A 0.12363431 0.133251027 -0.025087007 0.08216300 -0.001049249 0.10303755
#> O 0.12724792 -0.083490212 0.313586974 -0.27542852 0.478331381 0.25127992
#> [,25]
#> N 0.024318925
#> E 0.034957969
#> C 0.005514044
#> A 0.026713299
#> O -0.349846525
# Error covariance
attr(bfi_fs, which = "fsT")
#> fs_N fs_E fs_C fs_A fs_O
#> fs_N 0.169330114 -0.005940885 0.008966234 0.02689064 0.006632488
#> fs_E -0.005940885 0.267237548 -0.008256099 -0.06840869 -0.028478148
#> fs_C 0.008966234 -0.008256099 0.316471763 -0.01719329 -0.016556329
#> fs_A 0.026890643 -0.068408687 -0.017193295 0.34849177 -0.010843643
#> fs_O 0.006632488 -0.028478148 -0.016556329 -0.01084364 0.454791519
Recover factor covariances with 2S-PA
ts_fit <- tspa("",
data = data.frame(bscores$scores) |>
setNames(c("fs_N", "fs_E", "fs_C", "fs_A", "fs_O")),
fsT = attr(bfi_fs, which = "fsT"),
fsL = diag(5) |>
`dimnames<-`(list(c("fs_N", "fs_E", "fs_C", "fs_A", "fs_O"),
c("N", "E", "C", "A", "O"))))
lavInspect(ts_fit, what = "cor.lv") |>
(`[`)(c("A", "C", "E", "N", "O"), c("A", "C", "E", "N", "O")) |>
knitr::kable(digits = 2, caption = "With Bartlett scores and 2S-PA")
A | C | E | N | O | |
---|---|---|---|---|---|
A | 1.00 | 0.21 | 0.36 | -0.08 | 0.13 |
C | 0.21 | 1.00 | 0.28 | -0.20 | 0.19 |
E | 0.36 | 0.28 | 1.00 | -0.21 | 0.17 |
N | -0.08 | -0.20 | -0.21 | 1.00 | 0.00 |
O | 0.13 | 0.19 | 0.17 | 0.00 | 1.00 |