# Title: Structural Equation Modeling in R # Author: Danney Rasco # Affiliation: # Department of Psychology, Sociology, and Social Work # West Texas A&M University # 'THANK YOU to the following for supporting student travel and this event: College of Education and Social Sciences Department of Psychology, Sociology, and Social Work Dr. Lisa Garza' # #Install and attach packages---- PacMan <- function(pkg){ new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])] if (length(new.pkg)) install.packages(new.pkg, dependencies = TRUE) sapply(pkg, library, character.only = TRUE)} pkgs <- c("ggplot2", "scatterplot3d", "rgl", "car", "MVN", "olsrr", "lavaan", "dplyr", "semPlot") PacMan(pkgs) # #Set working directory if needed setwd("C:/Users/Danney/Desktop/SEM_inR") # #Import data---- SEM_DF <- read.csv("gradedata_2020_v3.csv", header=TRUE, na.strings=c("NA", " ", "")) names(SEM_DF) View(SEM_DF) # #Assumption testing ---- #Visually check linearity and homoscedasticity library(ggplot2) ggplot(data=SEM_DF, aes(x=SAT, y=FinalExam))+ geom_point()+ geom_smooth(method="lm", se=F) # ggplot(data=SEM_DF, aes(x=minStudy, y=FinalExam))+ geom_jitter(size=2, shape=1)+ geom_smooth(method="lm", se=F) # #Stationary 3D plot library(scatterplot3d) ScatEXAM_DF <- scatterplot3d( SEM_DF$FinalExam ~ SEM_DF$SAT + SEM_DF$minStudy, main="Final Exam \n by SAT and Minutes Studied", xlab="SAT", ylab="Min. Studied", zlab="Final Exam") ScatPlane <- lm(FinalExam~SAT+minStudy, data=SEM_DF) ScatEXAM_DF$plane3d(ScatPlane) # #Interactive 3D plot library(rgl) plot3d(FinalExam~SAT+minStudy, data=SEM_DF, xlab="SAT", ylab="Min. Studied", zlab="Final Exam") # #Scatterplot with Grouping Var. library(car) scatter3d(FinalExam~SAT+minStudy| schoolType, data=SEM_DF, xlab="SAT", ylab="Final Exam", zlab="minStudy") # ##Check Univariate and Multivariate Normality---- #Create data frame with variables of interest VarsInt <- subset.data.frame(SEM_DF, select=c(SAT, minStudy, FinalExam)) View(VarsInt) # library(MVN) MultNorm <- mvn(VarsInt, mvnTest = "royston", univariateTest = "Lillie", multivariatePlot = "qq", multivariateOutlierMethod = "quan") View(MultNorm$multivariateNormality) View(MultNorm$univariateNormality) View(MultNorm$Descriptives) # ##Evaluate Residuals---- #Run Linear Model MultReg <- lm( formula = FinalExam ~ SAT + minStudy, data = SEM_DF) # #Add predicted and residual values to data frame SEM_DF$FinExamPred <- predict(MultReg) SEM_DF$FinExamResid <- residuals(MultReg) View(SEM_DF) # #Create histogram of residuals ggplot(data=SEM_DF, aes(x=FinExamResid))+ geom_histogram(position="identity", alpha=0.8, binwidth=5, col="white")+ labs(title="Residuals Histogram")+ theme_classic() # #Check the mean of the residuals is 0 summary(SEM_DF$FinExamResid) # #Plot Predicted by Residual ggplot(data=SEM_DF, aes(x=FinExamPred, y=FinExamResid))+ geom_point()+ geom_smooth(method="lm", se=F) # ##Check Collinearity---- library(olsrr) CollTest <- ols_coll_diag(MultReg) View(CollTest$vif_t %>% mutate_if(is.numeric, round, 3)) # #Complete Regression---- MultReg <- lm(FinalExam ~ SAT + minStudy, SEM_DF) #Summary output for model summary.lm(MultReg) # #Summary for Bivariate model BivarReg <- lm(FinalExam~SAT, SEM_DF) anova(BivarReg) #Comparison between Bivariate and Multiple Regression #Does adding minStudy variable improve estimates for FinalExam? anova(BivarReg, MultReg) # #SEM: Mediation Model (Path Analysis)---- ##Create model---- names(SEM_DF) MedMod <- 'FinalExam~c*SAT minStudy~a*SAT FinalExam~b*minStudy indirect:=a*b total:=c+(a*b)' # MedMod2 <- 'FinalExam~c*SAT + b*minStudy minStudy~a*SAT indirect:=a*b total:=c+(a*b)' ##Test Model---- library(lavaan) MedModOut <- sem(MedMod, data=SEM_DF) MedModOut2 <- sem(MedMod2, data=SEM_DF) # #Display results summary(MedModOut, rsq=T, fit.measures=T) View(parameterEstimates(MedModOut, standardized = T)) # #Format results library(dplyr) View(parameterEstimates(MedModOut, standardized=T) %>% filter(op == ":=" | op == "~") %>% select(Path =label, b=est, StdErr=se, Z=z, "p value"=pvalue, Beta=std.all) %>% mutate_if(is.numeric, round, 3)) # View(parameterEstimates(MedModOut2, standardized=T) %>% filter(op == ":=" | op == "~") %>% select(Path =label, b=est, StdErr=se, Z=z, "p value"=pvalue, Beta=std.all) %>% mutate_if(is.numeric, round, 3)) # ##Visualize Model---- library(semPlot) semPaths(MedModOut) #Improve plot semPaths(MedModOut, nodeLabels = letters[1:3]) #(Left to Right; Predictor to Outcome) hor <- c(3, 2, 1) vert <- c(1, 2, 1) Place <- matrix(c(hor, vert), ncol=2) semLabs <- c("Final Exam", "Minutes \n Study", "SAT") semPaths(MedModOut, whatLabels="par", edge.label.cex = 1.5, edge.color = "black", residuals=FALSE, intercepts=FALSE, sizeMan=14, sizeMan2=7, layout=Place, nodeLabels=semLabs) # #Standardized Parameters semPaths(MedModOut, whatLabels="std", edge.label.cex = 1.5, edge.color = "black", residuals=FALSE, intercepts=FALSE, sizeMan=18, sizeMan2=8, label.cex=1.5, #Set label size to be consistent label.norm="OOOOOOOOOOO", layout=Place, nodeLabels=semLabs) # ##Bootstrapped estimates---- MedSEMboot <- sem(MedMod, data=SEM_DF, se="boot", bootstrap=2000) standardizedSolution(MedSEMboot) # # #Latent-Variable SEM---- names(SEM_DF) LV_MedMod <- 'ClassPerf=~Exam1+Exam2+FinalExam ClassPerf~c*SAT + b*minStudy minStudy~a*SAT indirect:=a*b total:=c+(a*b)' LV_MedSEM <- sem(LV_MedMod, data=SEM_DF) #Model Summary and Estimates summary(LV_MedSEM, rsq=TRUE, fit.measures=TRUE) View(parameterEstimates(LV_MedSEM)) #Latent Variable Summary (Measurement Model) View(parameterEstimates(LV_MedSEM, standardized=TRUE) %>% filter(op == "=~") %>% select("Lat Var"=lhs, Indicator=rhs, b=est, StdErr=se, Z=z, "p value"=pvalue, Beta=std.all)) #Path Model Summary View(parameterEstimates(LV_MedSEM, standardized=TRUE) %>% filter(op == ":=" | op == "~") %>% select("Path"=label, b=est, StdErr=se, Z=z, "p value"=pvalue, Beta=std.all)) # #Visualize Model semPaths(LV_MedSEM) #Improve plot semPaths(LV_MedSEM, nodeLabels = letters[1:6]) #(Left to Right; Predictor to Outcome) hor2 <- c(4, 4, 4, 2, 1, 3) vert2 <- c(.5, 0, -.5, 1, 0, 0) Place2 <- matrix(c(hor2, vert2), ncol=2) semLabs2 <- c("E1", "E2", "E3", "Minutes\nStudied", "SAT", "Class\nPerformance") semPaths(LV_MedSEM, whatLabels="std", edge.label.cex = 1.5, edge.color = "black", residuals=FALSE, intercepts=FALSE, sizeMan=14, sizeMan2=7, sizeLat=18,sizeLat2=8, label.cex=1.5, label.norm="OOOOOOOOOO", layout=Place2, nodeLabels=semLabs2) # #Position Indicators Under LV hor3 <- c(2.37, 3.02, 3.67, 2, 1, 3) vert3 <- c(-.5, -.5, -.5, 1, 0, 0) Place3 <- matrix(c(hor3, vert3), ncol=2) semPaths(LV_MedSEM, whatLabels="std", edge.label.cex = 1.3, edge.color = "black", residuals=FALSE, intercepts=FALSE, sizeMan=14, sizeMan2=7, sizeLat=18,sizeLat2=8, label.cex=.8, label.norm="OOOOOOO", layout=Place3, nodeLabels=semLabs2)