# ANOVA in R SCRIPT FILE. # www_statstutor_ac_uk Community Project. # Sofia Maria Karadimitriou and Ellen Marshall, Sheffield University. # Reviewed by Jim Bull, University of Swansea. # Dataset: diet csv. # Resource: ANOVA in R. #Open the diet dataset which is saved as a csv file and call it dietR. #If your file is saved as a standard Excel file, save it as a csv file first. #You will need to change the command depending on where you have saved the file. dietR<-read.csv("D:\\diet.csv",header=T) #Tell R we are using the diet dataset until further notice using attach. #This means that 'Height' can be used instead of dietR$Height. attach(dietR) #R assumes all numeric values are continuous so tell it that ‘gender’ and 'Diet' are factors. #as.factor(variable) is a quick way of defining factors or use the factor command to give categories names. Diet<-as.factor(Diet) # The factor command uses variable<-factor(variable,c(category numbers),labels=c(category names)). gender<-factor(gender,c(0,1),labels=c('Female','Male')) #calculate the weight lost by person (difference in weight before and after the diet) and add to the dataset. dietR$weightlost<-pre.weight-weight6weeks #attach the data again to use weightlost instead of dietR$weightlost' attach(dietR) #calculate means and standard deviations for each diet. #na.rm=T removes rows that missing values exist. mean<-tapply(weightlost,Diet,mean,na.rm=T) sd<-tapply(weightlost,Diet,sd,na.rm=T) #Combine in one table and give rownames results1<-cbind(mean,sd) rownames(results1)<-paste("Diet",1:3,sep=" ") #Round all the summary statistics to 2 decimal places. round(results1,2) #Graph a boxplot boxplot(weightlost~Diet,main='Weight Lost by Diet',xlab='Diet',ylab='Weight Lost') #To carry out a one way ANOVA use aov(dependent~independent) and give the ANOVA model a name eg anovaD. anovaD<-aov(weightlost~Diet) #To see the output use summary() summary(anovaD) #To carry out Tukey's post hoc adjustments for the pairwise comparisons. TukeyHSD(anovaD) #To check the assumptions. #Ask for the residuals (difference between each individual and their group mean). #save the residuals of the anova in a separate object res<-anovaD$residuals #Produce a histogram of the residuals. hist(res, main="Histogram of standardised residuals",xlab="Standardised residuals") ######## Assumption of equal variances #####. #A simple check of equal variances is to compare standard deviations. #If the largest is more than twice the smallest,the assumption has not been met. #Levene's test can be used as an alternative. #The Levene's test for equality of variances is in the car package. Load the additional library car. library(car) #If this command does not work, you will need to go to the Packages --> Install package(s) and select the UK (London)CRAN mirror. #Then look for the package 'car' and click. A lot of extra menus will download. Then try library(car) again. #Carry out Levene's test. leveneTest(weightlost~Diet) #Note: Rstudio currently has some issues with not all commands will work. #An alternative is available in the lawstat package. library(lawstat) levene.test(weightlost,Diet) #If neither work, compare standard deviations. #--------------------------------------------. #If the assumptions are not met. #If the Levene's test is significant, use the Welch test instead of ANOVA. library(car) oneway.test(weightlost~Diet) #If the residuals are not normally distributed, use the Kruskall-Wallis test. #See Kruskall-Wallis in R sheet for more details. kruskal.test(weightlost~Diet)