gpcfa-examples

Note: the estimation process can be time consuming depending on the computing power. You can same some time by reducing the length of the chains.

Categorical Data with Missingness but no Local Dependence:

  1. Load the package, obtain the data, loading pattern (qlam), and setup the design matrix Q.
library(LAWBL)
dat <- sim18ccfa40$dat
dim(dat)
R> [1] 1000   18
summary(dat) #10% missingness at random
R>        V1              V2              V3              V4       
R>  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
R>  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
R>  Median :2.500   Median :3.000   Median :3.000   Median :3.000  
R>  Mean   :2.486   Mean   :2.518   Mean   :2.503   Mean   :2.498  
R>  3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000  
R>  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
R>  NA's   :100     NA's   :114     NA's   :99      NA's   :109    
R>        V5              V6              V7              V8       
R>  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
R>  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
R>  Median :3.000   Median :3.000   Median :2.000   Median :3.000  
R>  Mean   :2.498   Mean   :2.502   Mean   :2.473   Mean   :2.504  
R>  3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000  
R>  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
R>  NA's   :100     NA's   :94      NA's   :110     NA's   :106    
R>        V9             V10             V11             V12       
R>  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
R>  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
R>  Median :3.000   Median :3.000   Median :2.000   Median :2.000  
R>  Mean   :2.493   Mean   :2.492   Mean   :2.483   Mean   :2.501  
R>  3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000  
R>  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
R>  NA's   :101     NA's   :103     NA's   :105     NA's   :95     
R>       V13             V14             V15             V16       
R>  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
R>  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000   1st Qu.:2.000  
R>  Median :2.000   Median :2.000   Median :3.000   Median :2.000  
R>  Mean   :2.477   Mean   :2.488   Mean   :2.517   Mean   :2.483  
R>  3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000   3rd Qu.:3.000  
R>  Max.   :4.000   Max.   :4.000   Max.   :4.000   Max.   :4.000  
R>  NA's   :94      NA's   :121     NA's   :93      NA's   :103    
R>       V17             V18       
R>  Min.   :1.000   Min.   :1.000  
R>  1st Qu.:2.000   1st Qu.:2.000  
R>  Median :2.500   Median :3.000  
R>  Mean   :2.498   Mean   :2.502  
R>  3rd Qu.:3.000   3rd Qu.:3.000  
R>  Max.   :4.000   Max.   :4.000  
R>  NA's   :118     NA's   :99
J <- ncol(dat) # no. of items
K <- 3 # no. of factors
qlam <- sim18ccfa40$qlam
qlam
R>       [,1] [,2] [,3]
R>  [1,]  0.7  0.0  0.0
R>  [2,]  0.7  0.0  0.0
R>  [3,]  0.7  0.0  0.0
R>  [4,]  0.7  0.0  0.0
R>  [5,]  0.7  0.4  0.0
R>  [6,]  0.7  0.4  0.0
R>  [7,]  0.0  0.7  0.0
R>  [8,]  0.0  0.7  0.0
R>  [9,]  0.0  0.7  0.0
R> [10,]  0.0  0.7  0.0
R> [11,]  0.0  0.7  0.4
R> [12,]  0.0  0.7  0.4
R> [13,]  0.0  0.0  0.7
R> [14,]  0.0  0.0  0.7
R> [15,]  0.0  0.0  0.7
R> [16,]  0.0  0.0  0.7
R> [17,]  0.4  0.0  0.7
R> [18,]  0.4  0.0  0.7

Q<-matrix(-1,J,K); # -1 for unspecified items
Q[1:2,1]<-Q[7:8,2]<-Q[13:14,3]<-1 # 1 for specified items
Q
R>       [,1] [,2] [,3]
R>  [1,]    1   -1   -1
R>  [2,]    1   -1   -1
R>  [3,]   -1   -1   -1
R>  [4,]   -1   -1   -1
R>  [5,]   -1   -1   -1
R>  [6,]   -1   -1   -1
R>  [7,]   -1    1   -1
R>  [8,]   -1    1   -1
R>  [9,]   -1   -1   -1
R> [10,]   -1   -1   -1
R> [11,]   -1   -1   -1
R> [12,]   -1   -1   -1
R> [13,]   -1   -1    1
R> [14,]   -1   -1    1
R> [15,]   -1   -1   -1
R> [16,]   -1   -1   -1
R> [17,]   -1   -1   -1
R> [18,]   -1   -1   -1
  1. E-step: Estimate with the GPCFA-LI model (E-step) by setting LD=F. Only a few loadings need to be specified in Q (e.g., 2 per factor). Longer chain is suggested for more accurate and stable estimation.
m0 <- pcfa(dat = dat, Q = Q,LD = FALSE, cati = -1,burn = 4000, iter = 4000,verbose = TRUE)

# summarize basic information
summary(m0)

#summarize significant loadings in pattern/Q-matrix format
summary(m0, what = 'qlambda') 

#factorial eigenvalue
summary(m0,what='eigen') 

#thresholds for categorical items
summary(m0,what='thd')

#plotting factorial eigenvalue
plot_eigen(m0) # trace
plot_eigen(m0, what='density') #density
plot_eigen(m0, what='APSR') #adj, PSRF
  1. C-step: Reconfigure the Q matrix for the C-step with one specified loading per item based on results from the E-step. Estimate with the GPCFA model by setting LD=TRUE (by default). Longer chain is suggested for more accurate and stable estimation.
Q<-matrix(-1,J,K);
tmp<-summary(m0, what="qlambda")
cind<-apply(tmp,1,which.max)
Q[cbind(c(1:J),cind)]<-1
#alternatively
#Q[1:6,1]<-Q[7:12,2]<-Q[13:18,3]<-1 # 1 for specified items

m1 <- pcfa(dat = dat, Q = Q, cati = -1,burn = 4000, iter = 4000,verbose = TRUE)
summary(m1)
summary(m1, what = 'qlambda')
summary(m1, what = 'offpsx') #summarize significant LD terms
summary(m1,what='eigen')
summary(m1,what='thd')

#plotting factorial eigenvalue
plot_eigen(m1) # trace
plot_eigen(m1, what='density') #density
plot_eigen(m1, what='APSR') #adj, PSRF
  1. CFA-LD: One can also configure the Q matrix for a CCFA model with local dependence (i.e. without any unspecified loading) based on results from the C-step.
Q<-summary(m1, what="qlambda")
Q[Q!=0]<-1
Q

m2 <- pcfa(dat = dat, Q = Q, cati = -1,burn = 4000, iter = 4000,verbose = TRUE)
summary(m2)
summary(m2, what = 'qlambda') 
summary(m2, what = 'offpsx')
summary(m2,what='eigen')
summary(m2,what='thd')

plot_eigen(m2) # Eigens' traces are excellent without regularization of the loadings

Categorical Data with Missingness and Local Dependence:

  1. Load the the data, loading pattern (qlam), and LD terms, and setup the design matrix Q.
dat <- sim18ccfa41$dat
summary(dat) #10% missingness at random
J <- ncol(dat) # no. of items
K <- 3 # no. of factors
sim18ccfa41$qlam
sim18ccfa41$LD # effect size = .3

Q<-matrix(-1,J,K); # -1 for unspecified items
Q[1:2,1]<-Q[7:8,2]<-Q[13:14,3]<-1 # 1 for specified items
Q
  1. E-step: Estimate with the GPCFA-LI model (E-step) by setting LD=FALSE. Only a few loadings need to be specified in Q (e.g., 2 per factor). Some loading estimates are biased due to ignoring the LD. So do the eigenvalues.
m0 <- pcfa(dat = dat, Q = Q,LD = FALSE, cati = -1,burn = 4000, iter = 4000,verbose = TRUE)
summary(m0)
summary(m0, what = 'qlambda')
summary(m0,what='eigen')
summary(m0,what='thd')

plot_eigen(m0) # trace
plot_eigen(m0, what='APSR')
  1. C-step: Reconfigure the Q matrix for the C-step with one specified loading per item based on results from the E-step. Estimate with the GPCFA model by setting LD=TRUE (by default). The estimates are more accurate, and the LD terms can be largely recovered.
Q<-matrix(-1,J,K);
tmp<-summary(m0, what="qlambda")
cind<-apply(tmp,1,which.max)
Q[cbind(c(1:J),cind)]<-1
Q

m1 <- pcfa(dat = dat, Q = Q, cati = -1,burn = 4000, iter = 4000,verbose = TRUE)
summary(m1)
summary(m1, what = 'qlambda')
summary(m1,what='eigen')
summary(m1, what = 'offpsx')
summary(m1,what='thd')
  1. CFA-LD: Configure the Q matrix for a CCFA model with local dependence (i.e. without any unspecified loading) based on results from the C-step. Results are better than, but similar to the C-step.
Q<-summary(m1, what="qlambda")
Q[Q!=0]<-1
Q

m2 <- pcfa(dat = dat, Q = Q, cati = -1,burn = 4000, iter = 4000,verbose = TRUE)
summary(m2)
summary(m2, what = 'qlambda') 
summary(m2,what='eigen')
summary(m2, what = 'offpsx')
summary(m2,what='thd')

Mixed-type Data with Missingness and Local Dependence:

  1. Load the the data, loading pattern (qlam), and LD terms, and setup the design matrix Q.
dat <- sim18mcfa41$dat
summary(dat) #10% missingness at random
J <- ncol(dat) # no. of items
K <- 3 # no. of factors
sim18mcfa41$qlam
sim18mcfa41$LD # effect size = .3

Q<-matrix(-1,J,K); # -1 for unspecified items
Q[1:2,1]<-Q[7:8,2]<-Q[13:14,3]<-1 # 1 for specified items
Q
  1. E-step: Estimate with the GPCFA-LI model (E-step) by setting LD=FALSE. Only a few loadings need to be specified in Q (e.g., 2 per factor). The first 12 items are categorical and need to be specified with cati.
m0 <- pcfa(dat = dat, Q = Q,LD = FALSE, cati = c(1:12),burn = 4000, iter = 4000,verbose = TRUE)
summary(m0)
summary(m0, what = 'qlambda')
summary(m0,what='eigen')
summary(m0,what='thd') # only for 12 items 

plot_eigen(m0) # trace
plot_eigen(m0, what='density')
plot_eigen(m0, what='APSR')
  1. C-step: Reconfigure the Q matrix for the C-step with one specified loading per item based on results from the E-step. Estimate with the GPCFA model by setting LD=TRUE (by default). The estimates are more accurate, and the LD terms can be largely recovered.
Q<-matrix(-1,J,K);
tmp<-summary(m0, what="qlambda")
cind<-apply(tmp,1,which.max)
Q[cbind(c(1:J),cind)]<-1
Q

m1 <- pcfa(dat = dat, Q = Q, cati = c(1:12),burn = 4000, iter = 4000,verbose = TRUE)
summary(m1)
summary(m1, what = 'qlambda')
summary(m1,what='eigen')
summary(m1, what = 'offpsx')
summary(m1,what='thd')
  1. CFA-LD: Configure the Q matrix for a mix of CFA and CCFA model with local dependence (i.e. without any unspecified loading) based on results from the C-step. Results are better than, but similar to the C-step.
Q<-summary(m1, what="qlambda")
Q[Q!=0]<-1
Q

m2 <- pcfa(dat = dat, Q = Q, cati = c(1:12),burn = 4000, iter = 4000,verbose = TRUE)
summary(m2)
summary(m2, what = 'qlambda') 
summary(m2,what='eigen')
summary(m2, what = 'offpsx')
summary(m2,what='thd')