# Title: Comparative tests in R: t Tests and ANOVAs # Author: Jason C. Rodin # Affiliation: # Department of Psychology, Sociology, and Social Work # West Texas A&M University # 'THANK YOU to the following for supporting student travel and this event: College of Education and Social Sciences Department of Psychology, Sociology, and Social Work Dr. Lisa Garza' # # Load the necessary packages ---- library(apa) # Useful for showing in-text near-APA library(apaTables) # Useful for near-APA output library(DescTools) # Useful for descriptive statistics and exploratory data analysis library(dplyr) # Useful for data manipulation/wrangling library(ggplot2) # Useful for creating graphs library(ggplotgui) # Allows GUI interface with ggplot library(readr) # Useful for importing csv files library(semTools) # Useful tools for structural equation modeling # Import the .csv titled "gradedata_2020" ---- gradedata_2020 <- as_tibble(read_csv("gradedata_2020.csv")) # Next, we are going to practice selecting only # the variables that we will need for this example. t_test_example <- select(gradedata_2020, gender, minStudy) # Calculate descriptive statistics split by gender ---- # Create a tibble that is split by "gender" Split_by_gender <- group_by(t_test_example, gender) # If you want to examine the key associated with each gender group_keys(Split_by_gender) # Calculate the descriptive statistics and store it # as "Descriptives_by_gender" Descriptives_by_gender <- summarize( Split_by_gender, mean = mean(minStudy), median = median(minStudy), sd = sd(minStudy), IQR = IQR(minStudy), min = min(minStudy), max = max(minStudy), n = n() ) Descriptives_by_gender # Remove "Split_by_gender" from working environment rm(Split_by_gender) # Assumption Testing ---- # Split the file into 2 separate tibbles split by gender men <- filter(t_test_example, gender == "men") women <- filter(t_test_example, gender == "women") # Check men$minStudy for evidence against normality # Calculate skew and round it to 3 decimal places round(skew(men$minStudy), 3) # Calculate kurtosis and round it to 3 decimal places round(kurtosis(men$minStudy), 3) # Run the Shapiro-Wilks normality test shapiro.test(men$minStudy) # Check the histogram hist(men$minStudy) # Create a boxplot and identify any outliers in male data Male_outliers <- boxplot(men[2])$out title("Boxplot of minutes studying for male participants") # Identifies the position of any outliers Male_outlier_position <- which(men$minStudy %in% Male_outliers) # To view outliers with their position in the data Male_outlier_table <- cbind.data.frame(Male_outliers, Male_outlier_position) # Please note: # This has 0 obs. (or observations) in this instance # because there are no outliers. # Check women$minStudy for evidence against normality # Calculate skew and round it to 3 decimal places round((women$minStudy), 3) # Calculate kurtosis and round it to 3 decimal places (kurtosis(women$minStudy), 3) # Run the Shapiro-Wilks normality test shapiro.test(women$minStudy) # Check the histogram (women$minStudy) # Identify outliers in Female data Female_outliers = boxplot(women[2])$out title("Boxplot of minutes studing for female participants") # Identifies the position of any outliers Female_outlier_position <- which(women$minStudy %in% Female_outliers) # To view outliers with their position in the data Female_outlier_table <- cbind.data.frame(Female_outliers, Female_outlier_position) # Conduct Levene's test of homogeneity of variance LeveneTest(minStudy~as.factor(gender), data = t_test_example) # Remove both men & women when finished rm(men) rm(women) rm(Male_outlier_table) rm(Female_outlier_table) rm(Male_outlier_position) rm(Female_outlier_position) rm(Male_outliers) rm(Female_outliers) # Conduct an independent samples t-test ---- t_test_results <- t_test( minStudy~gender, data = t_test_example, alternative = "two.sided", paired = FALSE, var.equal = TRUE, conf.level = 0.95 ) # Show results in APA format t_apa(t_test_results) # Create a graph ---- ggplot_shiny(t_test_example) # The code below will generate the graph: density_plot_by_gender <- ggplot(t_test_example, aes(x = minStudy, fill = gender)) + geom_density(position = 'identity', alpha = 0.8, adjust = 1.5) + facet_grid( gender ~ . )+ scale_fill_brewer(palette = 'Pastel2') + theme_classic() + theme( legend.position = 'none' ) density_plot_by_gender t_test_results <- t_test(minStudy~gender, data = t_test_example, var.equal = TRUE, conf.level = 0.95) t_test_results cohens_d(t_test_results, corr = "none") ##### ANOVAs in R ##### # Select the 2 variables we will be using for this example: # "schoolType" and "SAT" one_way_anova_example <- select(gradedata_2020, schoolType, SAT) # Calculate descriptive statistics split by "schoolType" ---- # Create a tibble that is split by "schoolType" Split_by_schoolType <- group_by(one_way_anova_example, schoolType) # Create a descriptive summary split by "schoolType" Descriptives_by_schoolType <- (Split_by_schoolType, mean = mean(SAT), sd = sd(SAT), median = median(SAT), iqr = IQR(SAT), min = min(SAT), max = max(SAT), n = n(), se = sd(SAT)/sqrt(n()) ) Descriptives_by_schoolType # Remove "Split_by_schoolType" from working environment rm(Split_by_schoolType) # Create a word document to show M & SD in near-APA apa.1way.table( iv=schoolType, dv=SAT, data = one_way_anova_example, show.conf.interval = TRUE, filename = "One-way ANOVA Descriptive Statistics Example.doc" ) # Assumption Testing ---- # Split the file into 4 separate tibbles by "schoolType" home <- filter(one_way_anova_example, schoolType == "home") private <- (one_way_anova_example, schoolType == "private") public <- filter(one_way_anova_example, == "public") religious <- filter(one_way_anova_example, schoolType == "religious") # Check the home$SAT for evidence against normality # Calculate skew and round it to 3 decimal places round(skew(home$SAT), 3) # Calculate kurtosis and round it to 3 decimal places round(kurtosis(home$SAT), 3) # Run the Shapiro-Wilks normality test (home$SAT) # Check the histogram hist(home$SAT, main = "Histogram of SAT scores for participants from home schools", xlab = "SAT") # Create a boxplot and identify any outliers in male data home_outliers <- boxplot(home[2])$out title("Boxplot of SAT scores for participants from home schools") # Identifies the position of any outliers home_outlier_position <- which(home$SAT %in% home_outliers) # To view outliers with their position in the data home_outlier_table <- cbind.data.frame(home_outliers, home_outlier_position) # Check the private$SAT for evidence against normality # Calculate skew and round it to 3 decimal places round(skew(private$SAT), 3) # Calculate kurtosis and round it to 3 decimal places round((private$SAT), 3) # Run the Shapiro-Wilks normality test shapiro.test(private$SAT) # Check the histogram hist(private$SAT, main = "Histogram of SAT scores for participants from private schools", xlab = "SAT") # Create a boxplot and identify any outliers in male data private_outliers <- boxplot(private[2])$out title("Boxplot of SAT scores for participants from private schools") # Identifies the position of any outliers private_outlier_position <- which(private$SAT %in% private_outliers) # To view outliers with their position in the data private_outlier_table <- (private_outliers, private_outlier_position) # Check the public$SAT # for evidence against normality # Calculate skew and round it to 3 decimal places round(skew(public$SAT), 3) # Calculate kurtosis and round it to 3 decimal places round(kurtosis(public$SAT), 3) # Run the Shapiro-Wilks normality test shapiro.test(public$SAT) # Check the histogram (Notice I used \n in my main title to force a line break) hist(public$SAT, main = "Histogram of SAT scores for participants from public schools", xlab = "SAT") # Create a boxplot and identify any outliers in male data public_outliers <- boxplot(public[2])$out title("Boxplot of SAT scores for participants from public schools") # Identifies the position of any outliers public_outlier_position <- which(public$SAT %in% public_outliers) # To view outliers with their position in the data public_outlier_table <- cbind.data.frame(public_outliers, public_outlier_position) # Check the religious$SAT for evidence against normality # Calculate skew and round it to 3 decimal places round(skew(religious$SAT), 3) # Calculate kurtosis and round it to 3 decimal places round(kurtosis(religious$SAT), 3) # Run the Shapiro-Wilks normality test shapiro.test(religious$SAT) # Check the histogram hist(religious$SAT, main = "Histogram of SAT scores for participants from religious schools", xlab = "SAT") # Create a boxplot and identify any outliers in male data religious_outliers <- (religious[2])$out title("Boxplot of SAT scores for participants from religious schools") # Identifies the position of any outliers religious_outlier_position <- which(religious$SAT %in% religious_outliers) # To view outliers with their position in the data religious_outlier_table <- cbind.data.frame(religious_outliers, religious_outlier_position) # Conduct Levene's test of homogeneity of variance LeveneTest( SAT~as.factor(schoolType), data = one_way_anova_example) # Remove the objects when finished (optional) ---- rm(home) rm(private) rm(public) rm(religious) rm(home_outlier_position) rm(private_outlier_position) rm(public_outlier_position) rm(religious_outlier_position) rm(home_outlier_table) rm(private_outlier_table) rm(public_outlier_table) rm(religious_outlier_table) rm(home_outliers) rm(private_outliers) rm(public_outliers) rm(religious_outliers) # Conduct a one-way ANOVA ---- # Create a linear model of SAT distributed across "schoolType" anova.model <- lm( formula = SAT~schoolType, data = one_way_anova_example ) # Create a word document to show ANOVA summary table in APA apa.aov.table( lm_output=anova.model, filename = "One-way ANOVA Summary Table Example.doc", conf.level = 0.95, type = 3) # Conduct post hoc testing ---- # To conduct the post hoc testing PostHocTest( aov(anova.model), method = "hsd", conf.level = 0.95 ) # Create a graph ---- schoolType_histogram <- ggplot(Descriptives_by_schoolType, aes(x=schoolType,y=mean)) + geom_col(fill = "#660000") + geom_errorbar(aes(ymin = mean - 1.96*se, ymax = mean + 1.96*se)) # Add labels and theme schoolType_histogram + labs( title = "Barchart of SAT scores by type of school", y="Mean SAT with 95% C.I.", x = "Type of school" ) + theme_classic() # For a GUI interface to ggplot2 ggplot_shiny(one_way_anova_example)