#    ANOVA in R SCRIPT FILE.
#    www_statstutor_ac_uk Community Project.
#    Sofia Maria Karadimitriou and Ellen Marshall, Sheffield University.
#    Reviewed by Jim Bull, University of Swansea.
#    Dataset: diet csv.
#    Resource: ANOVA in R.

#Open the diet dataset which is saved as a csv file and call it dietR.  
#If your file is saved as a standard Excel file, save it as a csv file first.  
#You will need to change the command depending on where you have saved the file.
dietR<-read.csv("D:\\diet.csv",header=T)

#Tell R we are using the diet dataset until further notice using attach.
#This means that 'Height' can be used instead of dietR$Height.
attach(dietR) 

#R assumes all numeric values are continuous so tell it that ‘gender’ and 'Diet' are factors.
#as.factor(variable) is a quick way of defining factors or use the factor command to give categories names. 
Diet<-as.factor(Diet)
# The factor command uses variable<-factor(variable,c(category numbers),labels=c(category names)).
gender<-factor(gender,c(0,1),labels=c('Female','Male'))

#calculate the weight lost by person (difference in weight before and after the diet) and add to the dataset.
dietR$weightlost<-pre.weight-weight6weeks
#attach the data again to use weightlost instead of dietR$weightlost'
attach(dietR)

#calculate means and standard deviations for each diet.
#na.rm=T removes rows that missing values exist.
mean<-tapply(weightlost,Diet,mean,na.rm=T)
sd<-tapply(weightlost,Diet,sd,na.rm=T)
#Combine in one table and give rownames
results1<-cbind(mean,sd)
rownames(results1)<-paste("Diet",1:3,sep=" ")
#Round all the summary statistics to 2 decimal places.
round(results1,2)
#Graph a boxplot
boxplot(weightlost~Diet,main='Weight Lost by Diet',xlab='Diet',ylab='Weight Lost')

#To carry out a one way ANOVA use aov(dependent~independent) and give the ANOVA model a name eg anovaD.
anovaD<-aov(weightlost~Diet)
#To see the output use summary()
summary(anovaD)

#To carry out Tukey's post hoc adjustments for the pairwise comparisons.
TukeyHSD(anovaD)

#To check the assumptions.

#Ask for the residuals (difference between each individual and their group mean).
#save the residuals of the anova in a separate object
res<-anovaD$residuals
#Produce a histogram of the residuals.
hist(res, main="Histogram of standardised residuals",xlab="Standardised residuals")

######## Assumption of equal variances #####.
#A simple check of equal variances is to compare standard deviations.
#If the largest is more than twice the smallest,the assumption has not been met.
#Levene's test can be used as an alternative. 
#The Levene's test for equality of variances is in the car package.  Load the additional library car.
library(car)
#If this command does not work, you will need to go to the Packages --> Install package(s) and select the UK (London)CRAN mirror.
#Then look for the package 'car' and click.  A lot of extra menus will download. Then try library(car) again.
#Carry out Levene's test.
leveneTest(weightlost~Diet)

#Note: Rstudio currently has some issues with not all commands will work.
#An alternative is available in the lawstat package.

library(lawstat)
levene.test(weightlost,Diet)
#If neither work, compare standard deviations.
#--------------------------------------------.
#If the assumptions are not met.
#If the Levene's test is significant, use the Welch test instead of ANOVA.
library(car)
oneway.test(weightlost~Diet)

#If the residuals are not normally distributed, use the Kruskall-Wallis test.
#See Kruskall-Wallis in R sheet for more details.
kruskal.test(weightlost~Diet)