#! /usr/bin/env python3 # import numpy as np def ford_best ( ): #*****************************************************************************80 # ## ford_best uses gradient descent to model Ford data. # # Discussion: # # Gradient descent for optimal (b,m) to fit (x,y) data # y = b + m*x # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 24 September 2019 # # Author: # # John Burkardt # import matplotlib.pyplot as plt import platform from gradient_descent2 import gradient_descent2 print ( '' ) print ( 'ford_best:' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' Given mileage x and price y for used Fords,' ) print ( ' seek (m,b) so that y=b+mx approximates the data.' ) print ( ' Use gradient descent to estimate best b and m.' ) # # Choose a starting guess for b and m. # b = 0.5 m = 0.0 bm0 = np.array ( [ b, m ] ) # # Choose a small learning rate. # r = 0.01 # # Choose tolerances for the size of dx and df, and limit the iterations. # dxtol = 0.001 dftol = 0.001 itmax = 1000 bm, it = gradient_descent2 ( ford_f, ford_df, bm0, r, dxtol, dftol, itmax ) # # Report the results. # b = bm[0] m = bm[1] print ( '' ) print ( ' Estimating model parameters:' ) print ( ' x = normalized mileage' ) print ( ' y = normalized price' ) print ( ' Seek relationship y = b + m * x' ) print ( ' b = %g' % ( b ) ) print ( ' m = %g' % ( m ) ) print ( ' Norm of initial error is %g' % ( ford_f ( bm0 ) ) ) print ( ' Norm of total error is %g' % ( ford_f ( bm ) ) ) # # As an extra, plot the data, and the approximating line. # x, y = ford_data ( ) plt.plot ( x, y, 'ro', markersize = 10 ) x2 = np.array ( [ 0.0, 1.0 ] ) y2 = b + m * x2 plt.plot ( x2, y2, linewidth = 3 ) plt.grid ( True ) plt.xlabel ( '<--- Normalized mileage --->' ) plt.ylabel ( '<--- Normalized price --->' ) plt.title ( 'Ford Escort Price as Function of Mileage', fontsize = 16 ) filename = 'ford_formula.png' plt.savefig ( filename ) print ( '' ) print ( ' Graphics saved in "%s"' % ( filename ) ) plt.show ( ) plt.clf ( ) # # Fix b and vary m: # m2 = np.linspace ( m - 1, m + 1, 101 ); e2 = np.zeros ( 101 ) for i in range ( 0, 101 ): e2[i] = 0.0 for j in range ( 0, 23 ): e2[i] = e2[i] + ( y[j] - b - m2[i] * x[j] )**2 plt.plot ( m2, e2, linewidth = 3 ) plt.plot ( [m-1,m+1],[0,0], 'k-', linewidth = 3 ) plt.plot ( m, e2[50], 'r.', markersize = 20 ) plt.grid ( True ) plt.xlabel ( '<-- Value of m -->' ) plt.ylabel ( '<-- Least squares error E -->' ) plt.title ( 'Least squares error as we vary m' ) filename = 'ford_best_m.png' plt.savefig ( filename ) print ( '' ) print ( ' Graphics saved in "%s"' % ( filename ) ) plt.show ( ) plt.clf ( ) # # Fix m and vary b: # b2 = np.linspace ( b - 1, b + 1, 101 ); e2 = np.zeros ( 101 ) for i in range ( 0, 101 ): e2[i] = 0.0 for j in range ( 0, 23 ): e2[i] = e2[i] + ( y[j] - b2[i] - m * x[j] )**2 plt.plot ( b2, e2, linewidth = 3 ) plt.plot ( [b-1,b+1],[0,0], 'k-', linewidth = 3 ) plt.plot ( b, e2[50], 'r.', markersize = 20 ) plt.grid ( True ) plt.xlabel ( '<-- Value of b -->' ) plt.ylabel ( '<-- Least squares error E -->' ) plt.title ( 'Least squares error as we vary b' ) filename = 'ford_best_b.png' plt.savefig ( filename ) print ( '' ) print ( ' Graphics saved in "%s"' % ( filename ) ) plt.show ( ) plt.clf ( ) # # Terminate. # print ( '' ) print ( 'ford_gradient2:' ) print ( ' Normal end of execution.' ) return def ford_f ( bm ): #*****************************************************************************80 # ## ford_f evaluates the norm of the residual function. # # Discussion: # # For each data item (x,y,), we estimate y = b + m * x. # # Our residual error is # value = sum ( y - b - m * x )^2 # # Input: # # real bm[2]: the values of b and m. # # Output: # # real f: the total residual, the sum of # ( y[i] - b - m * x[i] )**2 # for 0 < i < n. # b = bm[0] m = bm[1] x, y = ford_data ( ) f = np.sum ( ( y - b - m * x )**2 ) return f def ford_df ( bm ): #*****************************************************************************80 # ## ford_df evaluates the gradient of the error norm. # # Discussion: # # For each data item (x,y,), we estimate y = b + m * x. # # Our residual error is # value = sum ( y - b - m * x )^2 # # Our partial derivatives are: # d/db = sum - 2.0 * ( y - b - m * x ) # d/dm = sum - 2.0 * ( y - b - m * x ) * x # # Input: # # real bm[2]: the values of b and m. # # Output: # # real df[2]: the gradient vector [dfdb, dfdm], the # derivatives of the error function with respect to b and to m. # b = bm[0] m = bm[1] x, y = ford_data ( ) dfdb = - 2.0 * np.sum ( ( y - b - m * x ) ) dfdm = - 2.0 * np.sum ( ( y - b - m * x ) * x ) df = np.array ( [ dfdb, dfdm ] ) return df def ford_data ( ): #*****************************************************************************80 # ## ford_data returns a normalized copy of the Ford data. # # Discussion: # # Here, the data is read from a file: # # data = np.loadtxt ( 'ford_data.txt' ) # # It might be more efficient for it to be stored internally as an array: # # data = np.array ( [ # [ 27, 9991 ], # [ 17, 9925 ], # [ 28, 10491 ], # ... ..... # [ 7, 11000 ] ] ) # # Output: # # real x[23], y[23]: the mileage and asking price for used Ford Escorts. # data = np.loadtxt ( 'ford_data.txt' ) x = data[:,0] y = data[:,1] x = ( x - np.min ( x ) ) / ( np.max ( x ) - np.min ( x ) ) y = ( y - np.min ( y ) ) / ( np.max ( y ) - np.min ( y ) ) return x, y if ( __name__ == '__main__' ): ford_best ( )