# use source("mymaze.r", echo=T) to run this script from R # pause before each plot par(ask=T) # Load the data X <- read.csv("Maze_UniversityOfIllinois.csv") head(X) class(X) colnames(X) str(X) summary(X) # only keep the columns needed to make working easier df = data.frame(T1E=X$T1E, T1T=X$T1T, T15E=X$T15E, T15T=X$T15T) df$err1 = "low" # assigning "high" requires that the column not be a factor m = median(df$T1E) df$err1 = as.character(df$err1) df$err1[df$T1E >= m] = "high" df$err1 = factor(df$err1) #----------------------------- # First approach to test means of subset low and high to find out if # they are significantly different # group with errors lower than the median error low = df[df$err1 == "low", ] # data.frame # group with errors higher than the median error in the first attempt high = df[df$err1 == "high", ] # data.frame # check on 1st trial # the two means are significanlty different (alternative hypothesis) t.test(low$T1E, high$T1E) t.test(low$T1T, high$T1T) # check on 15th trial # the null hypothesis cannot be rejected (p-value > 0.05) t.test(low$T15E, high$T15E) t.test(low$T15T, high$T15T) # Second approach taking into account that df$err1 has exactly two factors t.test(df$T1E ~ df$err1) t.test(df$T1T ~ df$err1) t.test(df$T15E ~ df$err1) t.test(df$T15T ~ df$err1) # time to complete the maze # results are identical to the first approach, but much simpler to obtain #----------------------------- #Plotting the data # Create box plots plot(df$T1E ~ df$err1) plot(df$T1T ~ df$err1) plot(df$T15E ~ df$err1) plot(df$T15T ~ df$err1) # time to complete the maze #---------------------------------- # Plot T1T, T5T, T15T on the same plot to illustrate the times to complete # the maze decreasing par(cex=1.5) plot(X$T1T,col='black', sub="1st black, 5th green, 15th red", xlab="subject", ylab="time") points(X$T5T,col='green', pch=22, bg='green') points(X$T15T,col='red')