#    WILCOXON in R SCRIPT FILE.
#    www_statstutor_ac_uk Community Project.
#    Basile Marquier, Sheffield University.
#    Reviewed by Dan Wilson, University of Sheffield.
#    Dataset: Video csv.
#    Resource: WILCOXON in R.

#Open the diet dataset which is saved as a csv file and call it dietR. 
#If your file is saved as a standard Excel file, save it as a csv file first.
#You will need to change the command depending on where you have saved the file.

#LOADING THE DATA

#Download the data set in .csv format and put it in a directory on your computer
#Open the video dataset from the place you saved it and cll it videoR
#This example refers to the memory stick where the data is stored as stcp-Rdataset-video.
videoR<-read.csv("E:\\stcp-Rdataset-video.csv",header=T,sep=",")

#Tell R we are using the diet dataset until further notice using attach.
attach(videoR)

###################   ASSUMPTIONS     ###########

#Assumption 1:
#Your dependent variable should be measured at the ordinal or continuous level.

#Assumption 2:
#Your independent variable should consist of two categorical, "related groups" or "matched pairs".

#Assumption 3:
#If the difference of the two related samples is not normally distributed, then we should use the Wilcoxon signed rank test.
#Otherwise, we use a paired t-test
#We create the difference of TotalDDEMO and TotalCOld in order to test the normality

DiffCD=TotalDDEMO-TotalCOld

par(mfrow=c(1,1))

hist(DiffCD, main="Distribution of the differences between TotalDDEMO and TotalCOld ",xlab="Value")

# => Histogram skewed, then the difference of the two time series is considered as not normally distributed
# Then we use a non-parametric test: Wilcoxon signed rank test.


#######################
         ANALYSIS

#######################


#calculate medians

median(TotalDDEMO)
median(TotalCOld)

#To carry out a wilcoxon test use wilcoxon.test(sample 1 , sample 2, paired = TRUE) provided by 'stats' package
test<-wilcox.test(TotalDDEMO, TotalCOld, paired=TRUE,exact = FALSE)


## => Strong evidence against the null that the conditions are equivalent

# Calculating effect sizes:
#In order to calculate the effect size, the standardised test statistic Z is needed.
#As this is not part of the output, it can be calculated from the p-value using the following code 

Zstat<-qnorm(test$p.value/2)

abs(Zstat)/sqrt(20)

## => Effect size 0.8737202 is big according to Cohen's classification