#    PAIRED T TEST in R SCRIPT FILE.
#    www_statstutor_ac_uk Community Project.
#    Sofia Maria Karadimitriou Sheffield University.
#    Reviewed by Basile Marquier, University of Sheffield.
#    Dataset: Cholesterol.csv 
#    Resource: PAIRED T TEST in R.

#Open the cholesterol dataset which is saved as a csv file and call it cholA.  
#If your file is saved as a standard Excel file, save it as a csv file first.  
#You will need to change the command depending on where you have saved the file.
# and what you called the file e.g.the dataset'stcp-Rdataset-cholesterol'is in the D drive. .
#Header=T means that the first row contains variable headers.
cholA<-read.csv("D:\\stcp-Rdataset-cholesterol.csv",header=T)

#Look at the data.
cholA
#Tell R we are using the cholA dataset until further notice using attach.
#This means that 'Before' can be used instead of cholA$Before.
attach(cholA)

#calculate means and standard deviations for cholesterol Before the diet and after 4 weeks 
#means of Before and After4weeks
mu<-rbind(mean(Before),mean(After4weeks))
#standard deviation of pre and post marks
sds<-rbind(sd(Before),sd(After4weeks))
#Combine in one table and give rownames.
results1<-cbind(mu,sds)
colnames(results1)<-c('Mean','SD')
rownames(results1)<-c('Before','After4weeks')
#Round and display the results
round(results1,2)
#To calculate the differences between the Cholesterol levels at the two time points.
dif<-After4weeks-Before
#To calculate the mean change in cholesterol
mean(dif)
sd(dif)

#Checking assumptions.
#The differences need to be normally distributed.
#This can be checked using histograms, QQplots or tests (see Checking normality in R sheet).
#Specify that two charts are needed next to each other.
par(mfrow=c(1,2))
#Plot histogram for the difference in marks.
hist(dif,main='Histogram for Difference in Cholesterol levels',xlab='Differences')
#Plot QQplot for the difference in cholesterol levels.
qqnorm(dif)
#define the line in the x=y xis. The points have to be up or close to the line.
qqline(dif,col='red')
#Conduct the Shapiro-Wilk test. 
shapiro.test(dif)

#If the differences are normally distributed, the t-test is valid.
#Carry out the t-test.

t.test(After4weeks,Before,paired=T)
#If the t-test is significant, there is a difference between the cholesterol levels.
#Finish by reporting what that difference is. 

#If the differences are very skewed, use the Wilcoxon-Mann-Whitney test.
#See the Wilcoxon resource for more details.
wilcox.test(After4weeks,Before,paired=TRUE,exact=FALSE)