# INDEPENDENT T TEST in R SCRIPT FILE.
# www_statstutor_ac_uk Community Project.
# Sofia Maria Karadimitriou, Sheffield University.
# Reviewed by Basile Marquier, University of Sheffield.
# Dataset: birthweight_reduced csv.
# Resource: INDEPENDENT T TEST in R.
#Open the birthweight reduced dataset which is saved as a csv file and call it birthweightR.
#If your file is saved as a standard Excel file, save it as a csv file first.
#Download the data set in .csv format and put it in a directory on your computer
#Load the directory in which the .csv file is: by "File"->"Change dir."
birthweightR<-read.csv("stcp-Rdataset-birthweight_reduced.csv",header=T,sep=",")
#This example refers to the memory stick where the data is stored as stcp-Rdataset-birthweight_reduced.
birthweightR<-read.csv("E:\\stcp-Rdataset-birthweight_reduced.csv",header=T,sep=",")
#Tell R we are using the birthweight dataset until further notice using attach.
#This means that 'Gestation' can be used instead of birthweightR$Gestation.
attach(birthweightR)
#R assumes all numeric values are continuous so tell it that 'smoker' is a factor.
#and attach labels to the categories (for example 0 in smoker means the mother is a non-smoker).
# The factor command uses variable<-factor(variable,c(category numbers),labels=c(category names)).
smoker<-factor(birthweightR$smoker,c(0,1),labels=c('Non-smoker','Smoker'))
attach(birthweightR)
#calculate means and standard deviations for each diet.
#na.rm=T removes rows that missing values exist.
mean<-tapply(Birthweight,smoker,mean,na.rm=T)
sd<-tapply(Birthweight,smoker,sd,na.rm=T)
#Combine in one table and give rownames.
results1<-cbind(mean,sd)
#Round and display all the summary statistics to 2 decimal places.
round(results1,2)
#To calculate the difference between the means.
round(mean[1]-mean[2],2)
#Checking assumptions.
#The dependent variable by group needs to be normally distributed.
#This can be checked using histograms, QQplots or tests (see Checking normality in R sheet).
#Specify that two charts are needed next to each other.
par(mfrow=c(1,2))
#Plot histogram for the birthweight of babies of non-smoker mothers
hist(Birthweight[smoker=='Non-smoker'],main='Histogram for non smokers',xlab='Birthweight')
#Plot histogram for the birthweight of babies with smoker mothers
hist(Birthweight[smoker=='Smoker'],main='Histogram for smokers',xlab='Birthweight')
#Checking the assumption of equality of variances using the Levene's test.
library(car)
#Once loaded, carry out Levene's test.
leveneTest(Birthweight~smoker,center='mean')
#Note: Rstudio currently has some issues with not all commands will work.
#An alternative is available in the lawstat package.
#Load through Tools --> install packages.
library(lawstat)
levene.test(Birthweight,smoker)
#Carry out the t-test.
#use var.equal=TRUE if equal variances can be used and var.equal=FALSE if not.
t.test(Birthweight~smoker,var.equal=TRUE)
#If the t-test is significant, there is a difference between means.
#Finish by reporting what that difference is.