# Associations between variables # Dennis E. Slice (c) 2012 # Clean house rm(list = ls()) # Pause between plots par(ask=T) # Load data X = read.table("16_criminal_cambridge.RData") head(X) dim(X) # Extract the criminals X.criminal = subset(X, source == "criminal") head(X.criminal) dim(X.criminal) # Extract height and middle finger length Y = X.criminal[, 2:3] head(Y) # Put into separate variables for easier manipulation h = Y[, 1] l = Y[, 2] # Plot l v. h plot(h, l) # Compute r print(cor(l, h)) print(cor(h, l)) # Test r print(cor.test(l, h)) #Generates benign error msg. stop() # Note, ?cov() cov(l, h) # Recall formula for cov, cor cov(l, h)/sqrt(var(l) * var(h)) cor(l,h) # Standardize h, l # Subtract mean h.dev = h - mean(h) l.dev = l - mean(l) # Divide by standard deviation h.prime = h.dev/sd(h) l.prime = l.dev/sd(l) # Plot plot(h.prime,l.prime) # Compute cor from standardized variables # Sum of products scp = sum(h.prime*l.prime) # divide by n-1 r.direct = scp/length(h.prime-1) print(r.direct) # Compare cor.test(h,l) cor.test(h.prime,l.prime)