# SCATTERPLOTS IN R SCRIPT FILE. # www_statstutor_ac_uk Community Project. # Sofia Maria Karadimitriou and Ellen Marshall, Sheffield University. # Reviewed by Jim Bull, University of Swansea. # Dataset: Birthweight reduced csv. # Resource: Scatterplots in R. #Open the birthweight reduced dataset which is saved as a csv file and call it birthweightR. #If your file is saved as a standard Excel file, save it as a csv file first. #You will need to change the command depending on where you have saved the file. birthweightR<-read.csv("D:\\Birthweight reduced.csv",header=T,sep=",") #Tell R we are using the birthweight dataset until further notice using attach. #This means that 'Gestation' can be used instead of birthweightR$Gestation. attach(birthweightR) #R assumes all numeric values are continuous so tell it that ‘smoker’ is a factor. #and attach labels to the categories (for example 0 in smoker means the mother is a non-smoker). # The factor command uses variable<-factor(variable,c(category numbers),labels=c(category names)). smoker<-factor(smoker,c(0,1),labels=c('Non-smoker','Smoker')) #Plotting the relationship between two continuous variables. plot(Gestation,Birthweight,main='Scatterplot of gestational age and birthweight') #main='' gives the title. #xlab'' controls the x axis label. #ylab'' controls the y axis label. #pch changes the shape of the scatter, pch=4 gives crosses. #cex changes the size of the scatter. #lwd changes the width of the outline. plot(Gestation,Birthweight,main='Scatterplot of gestational age and birthweight',xlab='Gestation (weeks)',ylab='Birthweight (lbs)',pch=4,cex=1.5,lwd=3) #Creating a plot with different colours by Group. plot(Gestation,Birthweight,col=c('red','blue')[smoker],main='Scatterplot by smoker', pch=c(1,4)[smoker],xlab='Gestation (weeks)',ylab='Birthweight(lbs)') #adding a legend with the labels for each group. legend(x="topleft", legend = levels(smoker), col=c('red','blue'), pch=c(1,4)) #Producing a scatterplot matrix. pairs(~Birthweight+Gestation+motherage,main='Birth weight scatterplot matrix',col=c('red','blue')[smoker],pch=c(1,4)[smoker]) #Adding a regression line to the plot. plot(Gestation,Birthweight,main='Scatterplot of gestational age and birthweight',xlab='Gestation (weeks)',ylab='Birthweight(lbs)') abline(lm(Birthweight~Gestation),col='red',lwd=2)