# pause after each complete plot (after several plots if mfrow is used) par(ask=T) #-------------------------- pause <- function(msg="") { cat("Return...[",msg,"]","\n") readline() } #-------------------------- # read file, space-delimited df = read.table("happiness10.RData"); # check that data is read in properly head(df) # check the header labels colnames(df) # compute linear model m = lm(df$happiness ~ df$income + 0) # why not df$income ~ df$happiness? # plot the data plot(df$income, df$happiness) abline(m, col='red') # overlay regression line # correlation coefficient r = cor(df$income, df$happiness) cat("correlation coefficient: ", r, "\n") # what is slope and intercept? coefs = m$coefficients print(coefs) pause("after coefs") intercept = coefs[1] slope = coefs[2] cat("slope= ", slope, "\n") cat("intercept= ", intercept, "\n") pause("after slope/intercept") # alternative approach # the model provides the slope and intercept # however, it is not possible to store the slope/intercept into # a variable without copying it manually ourselves print("model: ") print(m) # regression line (best line fit) # y = intercept + slope * x # sum of residual squares # residual = (model fit) - (measured data) # method 1 pause("residuals: method 1") y = df$happiness x = df$income model = intercept + slope * x residuals = (model-y)^2 sum.res = sum(residuals) cat("sum of residuals: ", sum.res, "\n") pause("residuals: method 2") # method 2 sum.res = sum(m$residuals^2) print(sum.res) cat("sum(m$residuals^2)= ", sum.res, "\n") #----------------------------------- vals = c(37,48,52,56) predicted.values = intercept + slope*vals m1 = lm(df$happiness ~ df$income + 0) # The correlation is based on the scattered data, not on the regression line. # Therefore it is the same as when the regression does not contain "+0" #----------------------------------------------------------------------