#! /usr/bin/env python3 # def exercise1 ( ): #*****************************************************************************80 # ## exercise1 seeks the best linear model for two temperature items. # # Modified: # # 27 January 2022 # from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt import numpy as np print ( "exercise1:" ) print ( " Find best linear model for two items of temperature data." ) # # Get the data. # filename = 'two_temperatures_data.txt' data = np.loadtxt ( filename ) n, d = np.shape ( data ) print ( " Data from " + filename + " involves n =", n, "items with dimension d =", d ) # # Analyze the data # print ( " Data statistics:" ) print ( " Min = ", np.min ( data, axis = 0 ) ) print ( " Max = ", np.max ( data, axis = 0 ) ) print ( " Range = ", np.max ( data, axis = 0 ) - np.min ( data, axis = 0 ) ) print ( " Mean = ", np.mean ( data, axis = 0 ) ) print ( " Variance = ", np.var ( data, axis = 0 ) ) # # Display F versus C. # plt.clf ( ) plt.plot ( data[:,0], data[:,1], 'bo', markersize = 8 ) plt.grid ( True ) plt.xlabel ( '<--- Fahrenheit --->' ) plt.ylabel ( '<--- Celsius --->' ) plt.title ( 'Two temperatures, Fahrenheit/Celsius', fontsize = 16 ) filename = 'exercise1_two_temperatures_data.png' plt.savefig ( filename ) print ( ' Graphics saved as "%s"' % ( filename ) ) plt.show ( ) plt.clf ( ) # # Set up the linear system X * c = y # X = np.c_ [ np.ones ( n ), data[:,0] ] y = data[:,1] # # 1) Set up and solve the normal equations. # XTX = np.dot ( X.T, X ) XTb = np.dot ( X.T, y ) c1 = np.linalg.solve ( XTX, XTb ) r1 = np.dot ( X, c1 ) - y mse1 = np.sum ( r1**2 ) / n # # 2) Use QR factorization linalg.qr(). # Q, R = np.linalg.qr ( X ) QTb = np.dot ( Q.T, y ) c2 = np.linalg.solve ( R, QTb ) r2 = np.dot ( X, c2 ) - y mse2 = np.sum ( r2**2 ) / n # # 3) Use SVD pseudoinverse pinv(). # P = np.linalg.pinv ( X ) c3 = np.dot ( P, y ) r3 = np.dot ( X, c3 ) - y mse3 = np.sum ( r3**2 ) / n # # 4) Use numpy linalg.lstsq() # c4, _, _, _ = np.linalg.lstsq ( X, y, rcond = None ) r4 = np.dot ( X, c4 ) - y mse4 = np.sum ( r4**2 ) / n # # 5) Use scikit-learn LinearRegression() # lin_reg = LinearRegression() lin_reg.fit ( X, y ) c5 = np.r_ [ lin_reg.intercept_, lin_reg.coef_[1:] ] r5 = np.dot ( X, c5 ) - y mse5 = np.sum ( r5**2 ) / n # # Tabulate results. # print ( '' ) print ( ' Method ----------------c------------- mse(r)' ) print ( '' ); print ( ' Normal ', c1, ' ', mse1 ) print ( ' QR ', c2, ' ', mse2 ) print ( ' PINV ', c3, ' ', mse3 ) print ( ' LSTSQ ', c4, ' ', mse4 ); print ( ' sklearn ', c5, ' ', mse5 ) # # Plot linear relationship y = c[0] * 1 + c[1] * x[0] # fahr_min = np.min ( data[:,0] ) fahr_max = np.max ( data[:,0] ) fahr = np.linspace ( fahr_min, fahr_max, 51 ) celsius = ( c1[0] + c1[1] * fahr ) plt.clf ( ) plt.plot ( data[:,0], data[:,1], 'bo', markersize = 8 ) plt.plot ( fahr, celsius, 'r-', markersize = 8, linewidth = 2 ) plt.grid ( True ) plt.xlabel ( '<--- Fahrenheit --->' ) plt.ylabel ( '<--- Celsius --->' ) plt.title ( 'Two Temperature Data and Model', fontsize = 16 ) filename = 'exercise1_two_temperatures_fitted.png' plt.savefig ( filename ) print ( ' Graphics saved as "%s"' % ( filename ) ) plt.show ( ) plt.clf ( ) # # Rescale c and coefficient of y(which was -1) by 160/c[0]. # print ( '' ) print ( ' Rescaled coefficient vectors C[0], C[1], C[2]' ); for c in [ c1, c2, c3, c4, c5 ]: s = 160 / c[0] cy = -1.0 * s c = c * s print ( c, cy ) return if ( __name__ == "__main__" ): exercise1 ( )