# INDEPENDENT T TEST in R SCRIPT FILE. # www_statstutor_ac_uk Community Project. # Sofia Maria Karadimitriou, Sheffield University. # Reviewed by Basile Marquier, University of Sheffield. # Dataset: birthweight_reduced csv. # Resource: INDEPENDENT T TEST in R. #Open the birthweight reduced dataset which is saved as a csv file and call it birthweightR. #If your file is saved as a standard Excel file, save it as a csv file first. #Download the data set in .csv format and put it in a directory on your computer #Load the directory in which the .csv file is: by "File"->"Change dir." birthweightR<-read.csv("stcp-Rdataset-birthweight_reduced.csv",header=T,sep=",") #This example refers to the memory stick where the data is stored as stcp-Rdataset-birthweight_reduced. birthweightR<-read.csv("E:\\stcp-Rdataset-birthweight_reduced.csv",header=T,sep=",") #Tell R we are using the birthweight dataset until further notice using attach. #This means that 'Gestation' can be used instead of birthweightR$Gestation. attach(birthweightR) #R assumes all numeric values are continuous so tell it that 'smoker' is a factor. #and attach labels to the categories (for example 0 in smoker means the mother is a non-smoker). # The factor command uses variable<-factor(variable,c(category numbers),labels=c(category names)). smoker<-factor(birthweightR$smoker,c(0,1),labels=c('Non-smoker','Smoker')) attach(birthweightR) #calculate means and standard deviations for each diet. #na.rm=T removes rows that missing values exist. mean<-tapply(Birthweight,smoker,mean,na.rm=T) sd<-tapply(Birthweight,smoker,sd,na.rm=T) #Combine in one table and give rownames. results1<-cbind(mean,sd) #Round and display all the summary statistics to 2 decimal places. round(results1,2) #To calculate the difference between the means. round(mean[1]-mean[2],2) #Checking assumptions. #The dependent variable by group needs to be normally distributed. #This can be checked using histograms, QQplots or tests (see Checking normality in R sheet). #Specify that two charts are needed next to each other. par(mfrow=c(1,2)) #Plot histogram for the birthweight of babies of non-smoker mothers hist(Birthweight[smoker=='Non-smoker'],main='Histogram for non smokers',xlab='Birthweight') #Plot histogram for the birthweight of babies with smoker mothers hist(Birthweight[smoker=='Smoker'],main='Histogram for smokers',xlab='Birthweight') #Checking the assumption of equality of variances using the Levene's test. library(car) #Once loaded, carry out Levene's test. leveneTest(Birthweight~smoker,center='mean') #Note: Rstudio currently has some issues with not all commands will work. #An alternative is available in the lawstat package. #Load through Tools --> install packages. library(lawstat) levene.test(Birthweight,smoker) #Carry out the t-test. #use var.equal=TRUE if equal variances can be used and var.equal=FALSE if not. t.test(Birthweight~smoker,var.equal=TRUE) #If the t-test is significant, there is a difference between means. #Finish by reporting what that difference is.