# pause after each complete plot (after several plots if mfrow is used) par(ask=T) #-------------------------- pause <- function(msg="") { cat("Return...[",msg,"]","\n") readline() } #-------------------------- # read file, space-delimited df = read.table("happiness1000.RData"); # check that data is read in properly head(df) # check the header labels colnames(df) # compute linear model m = lm(df$happiness ~ df$income) # why not df$income ~ df$happiness? # plot the data plot(df$income, df$happiness, cex=.5) abline(m, col='red', lwd=3) # overlay regression line # correlation coefficient r = cor(df$income, df$happiness) cat("correlation coefficient: ", r, "\n") # what is slope and intercept? coefs = m$coefficients print(coefs) pause("after coefs") intercept = coefs[1] slope = coefs[2] cat("slope= ", slope, "\n") cat("intercept= ", intercept, "\n") pause("after slope/intercept") # alternative approach # the model provides the slope and intercept # however, it is not possible to store the slope/intercept into # a variable without copying it manually ourselves print("model: ") print(m) # regression line (best line fit) # y = intercept + slope * x # sum of residual squares # residual = (model fit) - (measured data) # method 1 pause("residuals: method 1") y = df$happiness x = df$income model = intercept + slope * x residuals = (model-y)^2 sum.res = sum(residuals) cat("sum of residuals: ", sum.res, "\n") pause("residuals: method 2") # method 2 sum.res = sum(m$residuals^2) print(sum.res) cat("sum(m$residuals^2)= ", sum.res, "\n")