#! /usr/bin/env python3 # def exercise3 ( ): #*****************************************************************************80 # ## exercise3 seeks the best linear model for the insurance data. # # Modified: # # 27 January 2022 # from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt import numpy as np print ( "exercise3:" ) print ( " Find best linear model for medical billing data." ) print ( " bill = c0 + c1 * age + c2 * sex + c3 * bmi + c4 * kids + c5 * smoker" ) # # Get the data. # filename = 'insurance_data.txt' data = np.loadtxt ( filename ) n, d = np.shape ( data ) print ( " Data from " + filename + " involves n =", n, "items with dimension d =", d ) # # Analyze the data # print ( " Data statistics:" ) print ( " Min = ", np.min ( data, axis = 0 ) ) print ( " Max = ", np.max ( data, axis = 0 ) ) print ( " Range = ", np.max ( data, axis = 0 ) - np.min ( data, axis = 0 ) ) print ( " Mean = ", np.mean ( data, axis = 0 ) ) print ( " Variance = ", np.var ( data, axis = 0 ) ) # # Display a histogram of the bill data, in data[:,6] # plt.clf ( ) plt.hist ( data[:,6], bins = 21 ) plt.grid ( True ) plt.title ( 'Medical bill histogram' ) filename = 'exercise3_histogram.png' plt.savefig ( filename ) print ( ' Graphics saved as "%s"' % ( filename ) ) plt.show ( ) plt.clf ( ) # # Set up the linear system X * c = y # X = np.c_ [ np.ones ( n ), data[:,0:5] ] y = data[:,6] # # Find c using numpy linalg.lstsq() # c, _, _, _ = np.linalg.lstsq ( X, y, rcond = None ) print ( " C = ", c ) r = np.dot ( X, c ) - y mse = np.sum ( r**2 ) / n print ( " MSE = ", mse ) # # Make two predictions: # age = 19 sex = 1 bmi = 24.6 kids = 1 smoker = 0 b = 1837.23 blin = c[0] * 1 + c[1] * age + c[2] * sex + c[3] * bmi + c[4] * kids + c[5] * smoker print ( "" ) print ( " For age = ", age, "sex = ", sex, "bmi = ", bmi, "kids = ", kids, "smoker = ", smoker ) print ( " the model predicts a bill of ", blin, "The actual bill was ", b ) age = 34 sex = 0 bmi = 31.92 kids = 1 smoker = 1 b = 37701 blin = c[0] * 1 + c[1] * age + c[2] * sex + c[3] * bmi + c[4] * kids + c[5] * smoker print ( "" ) print ( " For age = ", age, "sex = ", sex, "bmi = ", bmi, "kids = ", kids, "smoker = ", smoker ) print ( " the model predicts a bill of ", blin, "The actual bill was ", b ) return if ( __name__ == "__main__" ): exercise3 ( )