# pause after each complete plot (after several plots if mfrow is used)
par(ask=T)
#--------------------------
pause <- function(msg="") {
	cat("Return...[",msg,"]","\n")
	readline()
}
#--------------------------


# read file, space-delimited
df = read.table("happiness1000.RData");

# check that data is read in properly
head(df)

# check the header labels
colnames(df)

# compute linear model
m = lm(df$happiness ~ df$income)
# why not df$income ~ df$happiness? 

# plot the data
plot(df$income, df$happiness, cex=.5)
abline(m, col='red', lwd=3)  # overlay regression line

# correlation coefficient
r = cor(df$income, df$happiness)
cat("correlation coefficient: ", r, "\n")

# what is slope and intercept? 
coefs = m$coefficients
print(coefs)
pause("after coefs")
intercept = coefs[1]
slope = coefs[2]
cat("slope= ", slope, "\n")
cat("intercept= ", intercept, "\n")
pause("after slope/intercept")

# alternative approach
# the model provides the slope and intercept
# however, it is not possible to store the slope/intercept into 
# a variable without copying it manually ourselves
print("model: ")
print(m)

# regression line (best line fit)
#   y = intercept + slope * x

# sum of residual squares
# residual = (model fit) - (measured data)
# method 1
pause("residuals: method 1")
y = df$happiness
x = df$income
model = intercept + slope * x
residuals = (model-y)^2
sum.res = sum(residuals)
cat("sum of residuals: ", sum.res, "\n")

pause("residuals: method 2")
# method 2
sum.res = sum(m$residuals^2)
print(sum.res)
cat("sum(m$residuals^2)= ", sum.res, "\n")