# Title: Associative tests in R: Correlation to multivariate regression # Author: Jason C. Rodin # Affiliation: # Department of Psychology, Sociology, and Social Work # West Texas A&M University # 'THANK YOU to the following for supporting student travel and this event: College of Education and Social Sciences Department of Psychology, Sociology, and Social Work Dr. Lisa Garza' # # Load the necessary packages ---- library(apaTables) # Useful for near-APA output library(broom) # Useful for converting statistical objects into Tidy Tibbles library(corrplot) # Creates visual plots of correlation matrices library(dplyr) # Useful for data manipulation/wrangling library(ggfortify) # Allows ggplot2 to handle some popular packages library(ggplot2) # Useful for creating graphs library(ggplotgui) # Allows GUI interface with ggplot library(gvlma) # Useful for global validation of regression assumptions library(readr) # Used to import the .csv # Import the .csv titled "gradedata_2020" ---- gradedata_2020 <- as_tibble(read_csv("gradedata_2020.csv")) # Next, we are going to practice selecting only # the variables that we will need for this example. gradedata_tibble <- select(gradedata_2020, minStudy, SAT, FinalExam) # Helpful functions for getting started ---- # summary() provides a descriptive summary for each element of the tibble summary(gradedata_tibble) # View() displays the data in a form similar to many spreadsheets View(gradedata_tibble) # head() previews the first 6 observations by default head(gradedata_tibble) # tail() previews the last 6 observations by default tail(gradedata_tibble) # class() provides the class of our tibble class(gradedata_tibble) # str() lets us look at the structure of our data str(gradedata_tibble) # Create a linear model ---- regression_model <- lm( formula = FinalExam ~ minStudy + SAT, data = gradedata_tibble ) # Assumption testing ---- gvlma.lm(regression_model) # Check plots (Make sure margins are wide enough for this step) plot(gvlma(regression_model), onepage = FALSE) # We could also have used autoplot(regression_model)+theme_classic() # Check the mean of the residuals round((regression_model$residuals),3) # Check the histogram of the residuals (regression_model$residuals) # To augment the regression_model to a Tidy Tibble tidy_regression_metrics <- (augment(regression_model), 3) # To view a summary of the tidy_regression_metrics summary(tidy_regression_metrics) # Creating scatterplots ggplot_shiny(gradedata_tibble) # Scatterplot 1 scatterplot1 <- ggplot( gradedata_tibble, aes(x = minStudy, y = FinalExam)) + geom_point() + labs(x = 'Minutes spent studying', y = 'Final exam grade') + ggtitle('Scatterplot of minutes studying and final exam grade') + theme_classic()+ geom_smooth(method='lm') scatterplot1 # Scatterplot 2 scatterplot2 <- ( gradedata_tibble, aes(x = SAT, y = FinalExam)) + geom_point() + labs(x = 'SAT', y = 'Final exam grade') + ggtitle('Scatterplot of SAT score and final exam grade') + theme_classic()+ geom_smooth(method='lm') scatterplot2 # Correlation coefficients ---- # To create a correlation coefficient matrix correlation_coefficients <- round((gradedata_tibble, method = "pearson"),3) correlation_coefficients # To export those results in near-APA apa.cor.table( data = gradedata_tibble, filename = "Correlation Matrix Example.doc" ) # To visualize the correlation matrix corrplot(correlation_coefficients, method = "circle", addCoef.col = TRUE) # Regression coefficients ---- # summary of the regression_model (regression_model) # To examine the regression coefficients and store # the results as the object "regression_coefficients" regression_coefficients <- tidy(regression_model) regression_coefficients # To export the results in near-APA apa.reg.table( data = regression_model, filename = "Multiple Regression Example.doc", prop.var.conf.level = .95) # Alternatively, we could bootstrap the results apa.reg.boot.table( data = regression_model, filename = "Multiple Regression Bootstrapped Example.doc", number.samples = 1000) # Useful functions for regression ---- # model regression coefficients coefficients(regression_model) # CIs for model parameters confint(regression_model, level=0.95) # predicted values fitted(regression_model) # residuals residuals(regression_model) # anova table anova(regression_model) # covariance matrix for model parameters vcov(regression_model) # regression diagnostics influence(regression_model)