#    MANN WHITNEY in R SCRIPT FILE.
#    www_statstutor_ac_uk Community Project.
#    Sofia Maria Karadimitriou and Ellen Marshall, Sheffield University.
#    Reviewed by Basile Marquier, University of Sheffield.
#    Dataset: Ulcer csv.
#    Resource: MANN WHITNEY in R.


#Open the leg ulcer comparison dataset which is saved as a csv file and call it ulcerR.  
#If your file is saved as a standard Excel file, save it as a csv file first.  
#You will need to change the command depending on where you have saved the file.

#Download the data set in .csv format and put it in a directory on your computer
ulcerR<-read.csv("D:\\stcp-Rdataset-LegUlcer.csv",header=T,sep=",") 
#Tell R we are using the leg ulcer dataset until further notice using attach.
#This means that 'GROUP' can be used instead of ulcerR$GROUP.
attach(ulcerR)


#R assumes all numeric values are continuous so tell it that ‘GROUP’ is a factor. 
#and attach labels to the categories (for example 1 in group means the individual was at clinic).
# The factor command uses variable<-factor(variable,c(category numbers),labels=c(category names)).
GROUP<-factor(GROUP,c(1,2),labels=c('Clinic','Home'))
attach(ulcerR)




#calculate medians and standard deviations for each group.
#na.rm=T removes rows that missing values exist.
medians<-tapply(UFW,GROUP,median,na.rm=T)
iqrs<-tapply(UFW,GROUP,IQR,na.rm=T)
#Combine in one table and give rownames.
results1<-cbind(medians,iqrs)
#Round all the summary statistics to 2 decimal places.
round(results1,2)
#To calculate the difference between the medians.
round(medians[1]-medians[2],2)
#Checking assumptions.
#The shape of the distributions of the dependent variable by group must be similar
#This can be checked using histograms
#Specify that two charts are needed next to each other.
par(mfrow=c(1,2))
#Plot histogram for the UFW of individuals in clinic group
hist(UFW[GROUP=='Clinic'],main='Histogram for Clinic',xlab='Leg Ulcer free time')
#Plot histogram for the for UFW of individuals at home
hist(UFW[GROUP=='Home'],main='Histogram for Home',xlab='Leg Ulcer free time')




#Carry out the Mann Whitney Test.
#use wilcox.test command. By default it conducts the Mann Whitney U Test. 
wilcox.test(UFW~GROUP) 
#If the test is significant, there is a difference between medians.
#Finish by reporting what that difference is. 


#Moods Test in R
#Constructing the method
#Calculates a pooled median from all the data 
#then use a Chi-squared test to compare each group’s proportions below the pooled median.  
median.test <- function(x, y){
    z <- c(x, y)
    g <- rep(1:2, c(length(x), length(y)))
    m <- median(z)
    fisher.test(z < m, g)$p.value
}
median.test <- function(GROUP,UFW){
    z <- c(GROUP,UFW)
    g <- rep(1:2, c(length(x), length(y)))
    m <- median(z)
    fisher.test(z < m, g)$p.value
}

#Example of using it
median.test(UFW[GROUP=='Clinic'], UFW[GROUP=='Home'])