#! /usr/bin/env python3 # def exercise2 ( ): #*****************************************************************************80 # ## exercise2() seeks the best linear model for the Playfair data. # # Modified: # # 27 January 2022 # from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt import numpy as np print ( "exercise2():" ) print ( " Find best linear model for Playfair data." ) print ( " cost of wheat = c0 + c1 * year" ) # # Get the data. # filename = 'playfair_data.txt' data = np.loadtxt ( filename ) n, d = np.shape ( data ) print ( " Data from " + filename + " involves n =", n, "items with dimension d =", d ) # # Analyze the data # print ( " Data statistics:" ) print ( " Min = ", np.min ( data, axis = 0 ) ) print ( " Max = ", np.max ( data, axis = 0 ) ) print ( " Range = ", np.max ( data, axis = 0 ) - np.min ( data, axis = 0 ) ) print ( " Mean = ", np.mean ( data, axis = 0 ) ) print ( " Variance = ", np.var ( data, axis = 0 ) ) # # Construct variable COST = data[:,1] / data[:,2] # cost = data[:,1] / data[:,2] # # Display YEAR versus COST. # plt.clf ( ) plt.plot ( data[:,0], cost[:], 'bo', markersize = 8 ) plt.grid ( True ) plt.xlabel ( '<--- Year --->' ) plt.ylabel ( '<--- Cost --->' ) plt.title ( 'Cost for a measure of wheat, by year', fontsize = 16 ) filename = 'exercise2_data.png' plt.savefig ( filename ) print ( ' Graphics saved as "%s"' % ( filename ) ) plt.show ( ) plt.clf ( ) # # Set up the linear system X * c = y # X = np.c_ [ np.ones ( n ), data[:,0] ] y = cost[:] # # Find c using numpy linalg.lstsq() # c = np.linalg.lstsq ( X, y, rcond = None )[0] print ( " C = ", c ) r = np.dot ( X, c ) - y mse = np.sum ( r**2 ) / n print ( " MSE = ", mse ) # # Plot linear relationship y = c[0] * 1 + c[1] * x[0] # y_min = np.min ( data[:,0] ) y_max = np.max ( data[:,0] ) y_lin = np.linspace ( y_min, y_max, 51 ) c_lin = c[0] + c[1] * y_lin plt.clf ( ) plt.plot ( data[:,0], cost[:], 'bo', markersize = 8 ) plt.plot ( y_lin, c_lin, 'r-', markersize = 8, linewidth = 2 ) plt.grid ( True ) plt.xlabel ( '<--- Year --->' ) plt.ylabel ( '<--- Cost --->' ) plt.title ( 'Wheat Cost Data and Model', fontsize = 16 ) filename = 'exercise2_fitted.png' plt.savefig ( filename ) print ( ' Graphics saved as "%s"' % ( filename ) ) plt.show ( ) plt.clf ( ) # # Estimate price of wheat in 1850: # y1850 = 1850 c1850 = c[0] + c[1] * y1850 print ( "" ) print ( " The model predicts that in 1850, wheat will cost ", c1850 ) return def exercise2_normalized ( ): #*****************************************************************************80 # ## exercise2_normalized() seeks the best linear model for the Playfair data. # # Modified: # # 27 January 2022 # from sklearn.linear_model import LinearRegression import matplotlib.pyplot as plt import numpy as np print ( "exercise2_normalized():" ) print ( " Find best linear model for normalized Playfair data." ) print ( " cost of wheat = c0 + c1 * year" ) # # Get the data. # filename = 'playfair_data.txt' data = np.loadtxt ( filename ) n, d = np.shape ( data ) print ( " Data from " + filename + " involves n =", n, "items with dimension d =", d ) # # Analyze the data # print ( " Data statistics:" ) print ( " Min = ", np.min ( data, axis = 0 ) ) print ( " Max = ", np.max ( data, axis = 0 ) ) print ( " Range = ", np.max ( data, axis = 0 ) - np.min ( data, axis = 0 ) ) print ( " Mean = ", np.mean ( data, axis = 0 ) ) print ( " Variance = ", np.var ( data, axis = 0 ) ) # # Construct variable COST = data[:,1] / data[:,2] # cost = data[:,1] / data[:,2] costmin = np.min ( cost ) costmax = np.max ( cost ) cost = ( cost - costmin ) / ( costmax - costmin ) # # Normalize the data. # yearmin = np.min ( data[:,0] ) yearmax = np.max ( data[:,0] ) data[:,0] = ( data[:,0] - yearmin ) / ( yearmax - yearmin ) # # Display YEAR versus COST. # plt.clf ( ) plt.plot ( data[:,0], cost[:], 'bo', markersize = 8 ) plt.grid ( True ) plt.xlabel ( '<--- Year --->' ) plt.ylabel ( '<--- Cost --->' ) plt.title ( 'Cost for a measure of wheat, by year', fontsize = 16 ) filename = 'exercise2_normalized_data.png' plt.savefig ( filename ) print ( ' Graphics saved as "%s"' % ( filename ) ) plt.show ( ) plt.clf ( ) # # Set up the linear system X * c = y # X = np.c_ [ np.ones ( n ), data[:,0] ] y = cost[:] # # Find c using numpy linalg.lstsq() # c = np.linalg.lstsq ( X, y, rcond = None )[0] print ( " C = ", c ) r = np.dot ( X, c ) - y mse = np.sum ( r**2 ) / n print ( " MSE = ", mse ) # # Plot linear relationship y = c[0] * 1 + c[1] * x[0] # y_min = np.min ( data[:,0] ) y_max = np.max ( data[:,0] ) y_lin = np.linspace ( y_min, y_max, 51 ) c_lin = c[0] + c[1] * y_lin plt.clf ( ) plt.plot ( data[:,0], cost[:], 'bo', markersize = 8 ) plt.plot ( y_lin, c_lin, 'r-', markersize = 8, linewidth = 2 ) plt.grid ( True ) plt.xlabel ( '<--- Year --->' ) plt.ylabel ( '<--- Cost --->' ) plt.title ( 'Wheat Cost Data and Model', fontsize = 16 ) filename = 'exercise2_normalized_fitted.png' plt.savefig ( filename ) print ( ' Graphics saved as "%s"' % ( filename ) ) plt.show ( ) plt.clf ( ) return if ( __name__ == "__main__" ): exercise2 ( ) exercise2_normalized ( )