# Paired comparisons
# Dennis E. Slice (c) 2012
# Analysis of Sokal and Rohlf girl face widths
# In-class lecture prep

# Clear the air
rm(list=ls())

# Set up plotting
ask.orig = par()$ask
par(ask=TRUE)

# Load the data
X = read.table("22_paired_comparisons_SR_Box11.5.RData")

# Plot stacked histograms
mfrow.orig = par()$mfrow
par(mfrow=c(2,1))
# Use common range to compare histograms
xRange = c(6,9)

hist(X$five,xlim=xRange,col="red")
hist(X$six,xlim=xRange,col="red")

# boxplots
# Back to single plot in window
par(mfrow=c(1,1))
boxplot(X)

# Test ages for normality
print(shapiro.test(X$five))
print(shapiro.test(X$six))

# t test
print(t.test(X$five,X$six))

# difference
diff = X$six - X$five
hist(diff,col="papayawhip")
sample.t = t.test(diff)$statistic
print(sample.t)

# Randomization test
nSamples = 999
# Create vector to hold t-statistic for each random sample
results = rep(0,nSamples)
# Do the normal t-test
print(t.test(X$six-X$five))
count = 1 # One for the sample, itself.
for (i in 1:nSamples) {
  # Get the number of observations to randomize from the data
  # vector. Will work for other data sets.
	n = length(X$six)
  # Loop over each observation in the sample
	for (j in 1:n) {
    # If a random N(0,1) number is <0 swap across columns
    # That is, swap the five and six yo values for this girl.
    # If the random number is >0 skip it. This means there is
    # a 50:50 chance of being swapped for each observation.
		if (rnorm(1)<0) {
			hold = X$five[j]
			X$five[j] = X$six[j]
			X$six[j] = hold
		}		
	}
  # Compute the difference for the randomized data.
	diff = (X$six - X$five)
  # Test it against the default N(0,1) distribution and
  # save the t statistic.
	results[i] = t.test(diff)$statistic
	# See if it exceeds the magnitude of the sample.
	# Not (!(A<B)) means A is >= B
  # Increment counter for those random samples whose t-statistic
  # is greater or equal to the sample t in magnitude, i.e., use abs()
	if (!(abs(results[i])<abs(sample.t)) ) {
      count = count+1
		}
}
# Compute range to include sample and all randomized t
xRange = range(c(results,sample.t))
hist(c(results,sample.t),xlim=xRange,col="lightblue")
# Draw a line to show/emphasize sample t
lines(c(sample.t,sample.t),c(0,nSamples+1),col="red",lwd=2)
# Print the sample t and p-value based on number of 
# randomizations exceeding the magnitude of the t value in 
# the actual sample.
print(sample.t)
print(paste("p = ",(1.0*count)/(nSamples+1)))

# Put things back the way we found them.
par(mfrow=mfrow.orig)
par(ask=ask.orig)