# use source("mymaze.r", echo=T) to run this script from R

# pause before each plot
par(ask=T)

# Load the data
X <- read.csv("Maze_UniversityOfIllinois.csv")

head(X)
class(X)
colnames(X)
str(X)
summary(X)

# only keep the columns needed to make working easier
df = data.frame(T1E=X$T1E, T1T=X$T1T, T15E=X$T15E, T15T=X$T15T)
df$err1 = "low"

# assigning "high" requires that the column not be a factor
m = median(df$T1E)
df$err1 = as.character(df$err1)  
df$err1[df$T1E >= m] = "high"
df$err1 = factor(df$err1)

#-----------------------------
# First approach to test means of subset low and high to find out if 
# they are significantly different

# group with errors lower than the median error
low = df[df$err1 == "low", ]   # data.frame
# group with errors higher than the median error in the first attempt
high = df[df$err1 == "high", ]  # data.frame

# check on 1st trial
# the two means are significanlty different (alternative hypothesis)
t.test(low$T1E, high$T1E)
t.test(low$T1T, high$T1T)

# check on 15th trial
# the null hypothesis cannot be rejected (p-value > 0.05)
t.test(low$T15E, high$T15E)
t.test(low$T15T, high$T15T)

# Second approach taking into account that df$err1 has exactly two factors
t.test(df$T1E ~ df$err1)
t.test(df$T1T ~ df$err1)

t.test(df$T15E ~ df$err1)
t.test(df$T15T ~ df$err1)  # time to complete the maze

# results are identical to the first approach, but much simpler to obtain
#-----------------------------
#Plotting the data
# Create box plots

plot(df$T1E ~ df$err1)
plot(df$T1T ~ df$err1)

plot(df$T15E ~ df$err1)
plot(df$T15T ~ df$err1)  # time to complete the maze
#----------------------------------
# Plot T1T, T5T, T15T on the same plot to illustrate the times to complete
# the maze decreasing

par(cex=1.5) 
plot(X$T1T,col='black', sub="1st black, 5th green, 15th red", 
   xlab="subject", ylab="time")
points(X$T5T,col='green', pch=22, bg='green')
points(X$T15T,col='red')