#Title: R-ticulate - Intro to R---- #Author: Danney Rasco, PhD #Affiliation: # Department of Psychology, Sociology, and Social Work # West Texas A&M University # 'THANK YOU to the following for supporting student travel and this event: College of Education and Social Sciences Department of Psychology, Sociology, and Social Work Dr. Lisa Garza' # #PRE-WORKSHOP PACKAGE INSTALL #Select (Highlight with Cursor) #the next 12 lines of code (Lines 16 to 22) #Press Ctrl and Enter at the same time once selected options(repos=c(CRAN="https://cloud.r-project.org")) PacMan <- function(pkg){new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])] if (length(new.pkg)) install.packages(new.pkg, dependencies = TRUE) sapply(pkg, library, character.only = TRUE)} pkgs <- c("apa", "apaTables", "broom", "car", "corrplot", "DescTools", "dplyr", "ez", "foreign", "ggfortify", "ggplot2", "ggplotgui", "gvlma", "lavaan", "MVN", "nortest", "olsrr", "purrr", "readr", "rgl", "scatterplot3d", "semPlot", "semTools", "tidyr") PacMan(pkgs) # #STOP SELECTION #Once you hit Ctrl+Enter, you should see some code running in the Console window #It will likely take a few minutes to install all packages # It may take several minutes depending on the speed of connection # #Do not worry if the code is red. #Red is the default color for downloads. #As long as you see "downloaded XX KB" # or "downloaded XX MB" with a number for XX # it worked correctly. # #We will cover the remaining code during the session. #We look forward to meeting you soon. # # #Thank you---- #Department of Psychology, Sociology, and Social Work # Dr. Lisa Garza # #INTRO to Rstudio Windows---- #Script, Console, Environment, Files... # #CREATING A PROJECT FOLDER---- #1. Create folder on desktop or other location #2. Label folder #3. In R, select File>New Project #4. Select New Directory (unless you have an existing directory from previous work in R) #5. Select New Project #6. Select Browse #7. Find the location of the folder you previously created #8. Enter the directory name (e.g., IntroToR) #9. Hit Create Project # #Working with a SCRIPT---- #Open Script # In R, select File>New File>Rscript (or Ctrl+Shift+N) # #Save Script # File>Save As # #Few Basics: Comments, Foldable Sections, Running Code, etc.---- #Create comments to make code easier to read (or add reminders) # Start the comment with a "#" symbol # #Create foldable sections # Add four "-" marks (or more) on a comment line to create a foldable section # Some portions of code (e.g., functions) are automatically foldable # #Run a line (or lines) of code # To run a line of code, place your cursor on the line and hit "ctrl" and "enter" # To run lines of code, highlight the section of code and hit ctrl+enter # #Download (i.e., INSTALL) package # install.packages("[INSERT PACKAGE]", dependencies=TRUE) # #Attach package from library install.packages("apa", dependencies = TRUE) library(apa) #Attach multiple packages from library pkgs <- c("apaTables", "broom", "corrplot", "DescTools", "dplyr", "foreign", "ggfortify", "ggplot2", "ggplotgui", "gvlma", "purrr") lapply(pkgs, library, character.only=TRUE) # #Learn more about a function # Learn more about a function by using "help([insert function])" or "??[function]" # #Clear console # ctrl + l # #Working directory---- #Get working directory (i.e., what location are you currently using) getwd() # #Change (i.e., SET) working directory # Note: You can find location by right clicking folder and selecting properties # If you copy and paste location, flip slashes to forward setwd("C:/Users/Danney/Desktop/IntroToR") read.csv("DataExample.csv") # #OBJECT-ORIENTED CODING---- #INTRODUCE TERMINOLOGY: Object, Class, Package, Function, Library # Objects are assigned pieces of information (a score, vector, matrix, data frame) # Objects can be of different types or CLASSES that have certain attributes # Packages are created to perform a number of jobs or FUNCTIONS # Functions are typically performing a limited number of actions on the objects # Packages are stored in the library - Some automatically, some you add # #Assign a value (10) to an object (x) # NOTE: In Rstudio you can create "<-" using "alt" and "-" together x <- 10 #Check the Environment window; Should see new object (x) added # #Call the object in the console x #Add the value 20 to the object x x+20 #Assign sum of x+20 to a new object (z) z <- (x+20) #Call object z z #Fundamentals of data use---- #Assign vector of numbers to object (v1) v1 <- c(100, 108, 109, 88, 79, 99, 95, 106, 103, 118, 115, 121, 130, 145, 132, 82, 93, 84, 82, 117, 98, 97, 96, 148) #Convert vector to a data frame and call object data1 <- as.data.frame(v1) # #View data in console v1 data1 # #Open full data frame View(data1) # #Create a second variable for GPA v2 <- c(3.82, 3.60, 3.91, 2.58, 1.92, 2.02, 1.98, 3.20, 3.21, 3.80, 3.83, 3.90, 3.87, 3.20, 4.00, 2.68, 2.87, 2.00, 2.50, 1.80, 2.92, 3.20, 3.27, 3.87) # #Combine two vectors data2 <- data.frame(v1, v2) data2 # Note: data.frame can create a data frame; as.data.frame coerces an existing object to a data frame # #Example of using data.frame data2$ID <- (1:24) data3 <- data.frame(ID=c(6,8,32), IQ=v1, GPA=v2) View(data3) # #Add column labels for data2 names(data3) <- c("ID", "IQ","GPA") View(data3) head(data3, n=6) head(data3, n=20) # #Save data as a CSV file write.csv(data2, file="IQ_GPA_example2.csv", row.names=TRUE) write.csv(data3, file="IQ_GPA_example3.csv", row.names=FALSE) # #Write data to other file types (e.g., Stata) library(foreign) ?write.dta write.dta(data3, file="IQ_GPA_example4.dta") # #Read data from CSV file IQdata <- read.csv("IQ_GPA_example3.csv") head(IQdata) # #EXAMPLES: Read data from other files DataSPSS <- read.spss("FileName.sav", to.data.frame=TRUE) ??read.spss DataStata <- read.dta("FileName.dta") ??read.dta # #Summarizing and Visualizing Single Continuous Data - Descriptive Stats------- #Summary output for a single variable in data frame summary(IQdata$IQ) # #Summary output for entire data frame summary(IQdata) # #Look at distribution of IQ scores hist(IQdata$GPA, main="Histogram of GPA", xlab="GPA", ylab="n") # #Calculate mean for IQ IQmean <- mean(IQdata$IQ) IQmean # #Estimate variance for IQ IQvar <- var(IQdata$IQ) IQvar # #Estimate the standard deviation for IQ IQsd <- sd(IQdata$IQ) IQsd sqrt(IQvar) # #Report IQsd rounded to 2 decimal places round(IQsd, 2) # #Create z score IQdata$IQz <- 0 head(IQdata) IQdata$IQz <- ((IQdata$IQ-IQmean)/IQsd) View(IQdata) round(mean(IQdata$IQz), 4) round(sd(IQdata$IQz), 4) #skewness and kurtosis library(DescTools) Skew(IQdata$IQ) Kurt(IQdata$IQ) # #Recoding continuous to categorical----------------------------------- #Convert IQ to dichotomous variable: Above 100, Equal or below 100 #1. Add variable to data frame IQdata$IQcat <- 0 # #2. Create values for new variable using continuous variable IQdata$IQcat[IQdata$IQ<101] <- "Average or below" IQdata$IQcat[IQdata$IQ>100] <- "Above average" summary(IQdata$IQcat) #3. Let R know variable is categorical (i.e., factor) variable IQdata$IQcat <- factor(IQdata$IQcat) summary(IQdata$IQcat) # #Look at updated data with categorical variable View(IQdata) # #Summarizing and Visualizing Single Categorical Variable -------------- #Create frequency table for IQcat IQfreq <- table(IQdata$IQcat) IQfreq # #Create frequency table as data frame using wrapping as.data.frame(table(IQdata$IQcat, dnn='IQ category')) # #Create bar graph for IQcat using IQfreq barplot(IQfreq, main="Bar Graph: IQ", xlab="IQ", ylab="Frequency (n)", names.arg=c("Above Average", "Average or Below"), border="blue") # #Visualizing Continuous with Categorical------------------------------- # #Get error info - The easy way library(dplyr) SEdata <- IQdata %>% group_by(IQcat) %>% summarise(N=n(),mean=mean(GPA),sd=sd(GPA),se=sd/sqrt(N)) View(SEdata) # #Create plot with 95% CI around mean library(ggplot2) plot <- ggplot(data=SEdata, aes(x= IQcat, y=mean))+ geom_bar(stat="identity") plot plot+geom_errorbar(aes(ymin=mean-(1.96*se), ymax=mean+(1.96*se))) # #Make it pretty plot2 <- ggplot(data=SEdata, aes(x=IQcat, y=mean, fill=IQcat, width=.5))+ geom_bar(stat="identity", color="black")+ geom_errorbar(aes(ymin=mean-(1.96*se), ymax=mean+(1.96*se)), width=.2)+ labs(title="Average GPA by IQ", x="IQ Category", y="GPA (M)") plot2 plot2+scale_fill_manual(values=c("#000099", "#990000"))+theme_classic() #Use WT colors plot2+scale_fill_manual(values=c("#660000", "#990000"))+theme_minimal()+ theme(plot.title=element_text(hjust=0.5)) plot3 <- ggplot(data=SEdata, aes(x=IQcat, y=mean, fill=IQcat, width=.5))+ geom_bar(stat="identity", color="black")+ geom_errorbar(aes(ymin=mean-(1.96*se), ymax=mean+(1.96*se)), width=.2)+ labs(title="Average GPA by IQ", x="IQ Category", y="GPA (M)")+ scale_fill_manual(values=c("#660000", "#990000"))+theme_minimal()+theme(plot.title=element_text(size = 22, hjust=0.5)) plot3 # #Create histogram by group histBygrp <- ggplot(data=IQdata, aes(x=GPA, fill=IQcat))+ geom_histogram(position='identity', alpha=0.8, binwidth=1)+ scale_fill_brewer(palette="Dark2")+theme_classic() histBygrp densityBygrp <- ggplot(data=IQdata, aes(x=GPA, fill=IQcat))+ geom_density(position='identity', alpha=0.8, adjust=0.8)+ scale_fill_manual(values=c("#660000", "#990000"))+theme_classic() densityBygrp #Data Analysis and Functions in R---------------------------------------------------- #Creating a Function for One-sample z test ztest = function(x, mu, sigma){ Mx = mean(x) n = length(x) z = round((Mx - mu) / (sigma/sqrt(n)), 3) p = round((2*pnorm(-abs(z), mean = 0, sd = 1)), 3) statistic <- c("Sample Size (n)", "Sample Mean", "z obtained", "p (two-tailed)") value <- c(n, Mx, z, p) output <- cbind(statistic, value) as.data.frame(output) } #Using Function for One-sample z test #Compare IQ scores in IQdata to pop. (M=100, SD=15) ztest(IQdata$IQ, 100, 15)