#! /usr/bin/env python3 # def pizza_kmeans ( ): #*****************************************************************************80 # ## pizza_kmeans does a simple clustering exercise. # # Discussion: # # There are three pizza trucks, red, green, and blue. # Each house is served by exactly one truck. # The cost of this system is the total of the distance between each # house and its truck. # # Using k-means, we seek to minimize this cost, by adjusting the # assignment of trucks to houses, and by adjusting the location of # the trucks. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 24 September 2019 # # Author: # # John Burkardt # import matplotlib.pyplot as plt import numpy as np print ( '' ) print ( 'pizza_kmeans:' ) print ( ' MATH1900 Selected Topics in Mathematics: Machine Learning' ) # # Read the house data, x, y, truck # house = np.loadtxt ( 'pizza_houses.txt' ) x = house[:,0] y = house[:,1] t = house[:,2] n = len ( x ) print ( ' Number of houses is %d' % ( n ) ) # # Read the truck data. # c = np.loadtxt ( 'pizza_trucks.txt' ) # # Determine distance from each house to the red, green, and blue trucks. # rd = np.sqrt ( ( x - c[0,0] )**2 + ( y - c[0,1] )**2 ) gd = np.sqrt ( ( x - c[1,0] )**2 + ( y - c[1,1] )**2 ) bd = np.sqrt ( ( x - c[2,0] )**2 + ( y - c[2,1] )**2 ) # # Create lists of the houses in the red, green, and blue clusters. # rc = np.where ( t == 0 ) gc = np.where ( t == 1 ) bc = np.where ( t == 2 ) # # The cost is the sum of the distances between every house and its truck. # cost = sum ( bd[bc] ) + sum ( rd[rc] ) + sum ( gd[gc] ) print ( ' Cost of initial clustering is ', cost ) # # Step 0 # plt.plot ( x[rc], y[rc], 'r.', markersize = 20 ) plt.plot ( x[gc], y[gc], 'g.', markersize = 20 ) plt.plot ( x[bc], y[bc], 'b.', markersize = 20 ) plt.plot ( c[0,0], c[0,1], 'r*', markersize = 25 ) plt.plot ( c[1,0], c[1,1], 'g*', markersize = 25 ) plt.plot ( c[2,0], c[2,1], 'b*', markersize = 25 ) plt.xlabel ( '<-- X -->', fontsize = 16 ) plt.ylabel ( '<-- Y -->', fontsize = 16 ) plt.title ( 'Houses, Trucks, and Clusters, Step 0', fontsize = 16 ) plt.grid ( True ) filename = 'pizza_step0.png' plt.savefig ( filename ) plt.show ( ) plt.clf ( ) # # Step 1: Update houses to nearest truck. # print ( '' ) print ( ' Assign each house to nearest truck.' ) rd = np.sqrt ( ( x - c[0,0] )**2 + ( y - c[0,1] )**2 ) gd = np.sqrt ( ( x - c[1,0] )**2 + ( y - c[1,1] )**2 ) bd = np.sqrt ( ( x - c[2,0] )**2 + ( y - c[2,1] )**2 ) rc = np.where ( ( rd < bd ) & ( rd < gd ) ) gc = np.where ( ( gd < bd ) & ( gd < rd ) ) bc = np.where ( ( bd < rd ) & ( bd < gd ) ) cost = sum ( bd[bc] ) + sum ( rd[rc] ) + sum ( gd[gc] ) print ( ' Cost is ', cost ) plt.plot ( x[rc], y[rc], 'r.', markersize = 20 ) plt.plot ( x[gc], y[gc], 'g.', markersize = 20 ) plt.plot ( x[bc], y[bc], 'b.', markersize = 20 ) plt.plot ( c[0,0], c[0,1], 'r*', markersize = 25 ) plt.plot ( c[1,0], c[1,1], 'g*', markersize = 25 ) plt.plot ( c[2,0], c[2,1], 'b*', markersize = 25 ) plt.xlabel ( '<-- X -->', fontsize = 16 ) plt.ylabel ( '<-- Y -->', fontsize = 16 ) plt.title ( 'Assign houses to nearest truck, Step 1', fontsize = 16 ) plt.grid ( True ) filename = 'pizza_step1.png' plt.savefig ( filename ) plt.show ( ) plt.clf ( ) # # Step 2: Move trucks to center of clusters. # print ( '' ) print ( ' Move each truck to cluster center.' ) c[0,0] = np.mean ( x[rc] ) c[0,1] = np.mean ( y[rc] ) c[1,0] = np.mean ( x[gc] ) c[1,1] = np.mean ( y[gc] ) c[2,0] = np.mean ( x[bc] ) c[2,1] = np.mean ( y[bc] ) plt.plot ( x[rc], y[rc], 'r.', markersize = 20 ) plt.plot ( x[gc], y[gc], 'g.', markersize = 20 ) plt.plot ( x[bc], y[bc], 'b.', markersize = 20 ) plt.plot ( c[0,0], c[0,1], 'r*', markersize = 25 ) plt.plot ( c[1,0], c[1,1], 'g*', markersize = 25 ) plt.plot ( c[2,0], c[2,1], 'b*', markersize = 25 ) plt.xlabel ( '<-- X -->', fontsize = 16 ) plt.ylabel ( '<-- Y -->', fontsize = 16 ) plt.title ( 'Move Trucks to Centers, Step 2', fontsize = 16 ) plt.grid ( True ) filename = 'pizza_step2.png' plt.savefig ( filename ) plt.show ( ) plt.clf ( ) # # Step 3: Update houses to nearest truck. # print ( '' ) print ( ' Assign each house to nearest truck.' ) rd = np.sqrt ( ( x - c[0,0] )**2 + ( y - c[0,1] )**2 ) gd = np.sqrt ( ( x - c[1,0] )**2 + ( y - c[1,1] )**2 ) bd = np.sqrt ( ( x - c[2,0] )**2 + ( y - c[2,1] )**2 ) rc = np.where ( ( rd < bd ) & ( rd < gd ) ) gc = np.where ( ( gd < bd ) & ( gd < rd ) ) bc = np.where ( ( bd < rd ) & ( bd < gd ) ) cost = sum ( bd[bc] ) + sum ( rd[rc] ) + sum ( gd[gc] ) print ( ' Cost is ', cost ) plt.plot ( x[rc], y[rc], 'r.', markersize = 20 ) plt.plot ( x[gc], y[gc], 'g.', markersize = 20 ) plt.plot ( x[bc], y[bc], 'b.', markersize = 20 ) plt.plot ( c[0,0], c[0,1], 'r*', markersize = 25 ) plt.plot ( c[1,0], c[1,1], 'g*', markersize = 25 ) plt.plot ( c[2,0], c[2,1], 'b*', markersize = 25 ) plt.xlabel ( '<-- X -->', fontsize = 16 ) plt.ylabel ( '<-- Y -->', fontsize = 16 ) plt.title ( 'Assign houses to nearest truck, Step 3', fontsize = 16 ) plt.grid ( True ) filename = 'pizza_step3.png' plt.savefig ( filename ) plt.show ( ) plt.clf ( ) # # Step 4: Move trucks to center of clusters. # print ( '' ) print ( ' Move each truck to cluster center.' ) c[0,0] = np.mean ( x[rc] ) c[0,1] = np.mean ( y[rc] ) c[1,0] = np.mean ( x[gc] ) c[1,1] = np.mean ( y[gc] ) c[2,0] = np.mean ( x[bc] ) c[2,1] = np.mean ( y[bc] ) plt.plot ( x[rc], y[rc], 'r.', markersize = 20 ) plt.plot ( x[gc], y[gc], 'g.', markersize = 20 ) plt.plot ( x[bc], y[bc], 'b.', markersize = 20 ) plt.plot ( c[0,0], c[0,1], 'r*', markersize = 25 ) plt.plot ( c[1,0], c[1,1], 'g*', markersize = 25 ) plt.plot ( c[2,0], c[2,1], 'b*', markersize = 25 ) plt.xlabel ( '<-- X -->', fontsize = 16 ) plt.ylabel ( '<-- Y -->', fontsize = 16 ) plt.title ( 'Move Trucks to Centers, Step 4', fontsize = 16 ) plt.grid ( True ) filename = 'pizza_step4.png' plt.savefig ( filename ) plt.show ( ) plt.clf ( ) # # Step 5: Update houses to nearest truck. # print ( '' ) print ( ' Assign each house to nearest truck.' ) rd = np.sqrt ( ( x - c[0,0] )**2 + ( y - c[0,1] )**2 ) gd = np.sqrt ( ( x - c[1,0] )**2 + ( y - c[1,1] )**2 ) bd = np.sqrt ( ( x - c[2,0] )**2 + ( y - c[2,1] )**2 ) rc = np.where ( ( rd < bd ) & ( rd < gd ) ) gc = np.where ( ( gd < bd ) & ( gd < rd ) ) bc = np.where ( ( bd < rd ) & ( bd < gd ) ) cost = sum ( bd[bc] ) + sum ( rd[rc] ) + sum ( gd[gc] ) print ( ' Cost is ', cost ) plt.plot ( x[rc], y[rc], 'r.', markersize = 20 ) plt.plot ( x[gc], y[gc], 'g.', markersize = 20 ) plt.plot ( x[bc], y[bc], 'b.', markersize = 20 ) plt.plot ( c[0,0], c[0,1], 'r*', markersize = 25 ) plt.plot ( c[1,0], c[1,1], 'g*', markersize = 25 ) plt.plot ( c[2,0], c[2,1], 'b*', markersize = 25 ) plt.xlabel ( '<-- X -->', fontsize = 16 ) plt.ylabel ( '<-- Y -->', fontsize = 16 ) plt.title ( 'Assign houses to nearest truck, Step 5', fontsize = 16 ) plt.grid ( True ) filename = 'pizza_step5.png' plt.savefig ( filename ) plt.show ( ) plt.clf ( ) # # Step 6: Move trucks to center of clusters. # print ( '' ) print ( ' Move each truck to cluster center.' ) c[0,0] = np.mean ( x[rc] ) c[0,1] = np.mean ( y[rc] ) c[1,0] = np.mean ( x[gc] ) c[1,1] = np.mean ( y[gc] ) c[2,0] = np.mean ( x[bc] ) c[2,1] = np.mean ( y[bc] ) plt.plot ( x[rc], y[rc], 'r.', markersize = 20 ) plt.plot ( x[gc], y[gc], 'g.', markersize = 20 ) plt.plot ( x[bc], y[bc], 'b.', markersize = 20 ) plt.plot ( c[0,0], c[0,1], 'r*', markersize = 25 ) plt.plot ( c[1,0], c[1,1], 'g*', markersize = 25 ) plt.plot ( c[2,0], c[2,1], 'b*', markersize = 25 ) plt.xlabel ( '<-- X -->', fontsize = 16 ) plt.ylabel ( '<-- Y -->', fontsize = 16 ) plt.title ( 'Move Trucks to Centers, Step 6', fontsize = 16 ) plt.grid ( True ) filename = 'pizza_step6.png' plt.savefig ( filename ) plt.show ( ) plt.clf ( ) # # Step 7: Update houses to nearest truck. # print ( '' ) print ( ' Assign each house to nearest truck.' ) rd = np.sqrt ( ( x - c[0,0] )**2 + ( y - c[0,1] )**2 ) gd = np.sqrt ( ( x - c[1,0] )**2 + ( y - c[1,1] )**2 ) bd = np.sqrt ( ( x - c[2,0] )**2 + ( y - c[2,1] )**2 ) rc = np.where ( ( rd < bd ) & ( rd < gd ) ) gc = np.where ( ( gd < bd ) & ( gd < rd ) ) bc = np.where ( ( bd < rd ) & ( bd < gd ) ) cost = sum ( bd[bc] ) + sum ( rd[rc] ) + sum ( gd[gc] ) print ( ' Cost is ', cost ) plt.plot ( x[rc], y[rc], 'r.', markersize = 20 ) plt.plot ( x[gc], y[gc], 'g.', markersize = 20 ) plt.plot ( x[bc], y[bc], 'b.', markersize = 20 ) plt.plot ( c[0,0], c[0,1], 'r*', markersize = 25 ) plt.plot ( c[1,0], c[1,1], 'g*', markersize = 25 ) plt.plot ( c[2,0], c[2,1], 'b*', markersize = 25 ) plt.xlabel ( '<-- X -->', fontsize = 16 ) plt.ylabel ( '<-- Y -->', fontsize = 16 ) plt.title ( 'Assign houses to nearest truck, Step 7', fontsize = 16 ) plt.grid ( True ) filename = 'pizza_step7.png' plt.savefig ( filename ) plt.show ( ) plt.clf ( ) # # Step 8: Move trucks to center of clusters. # print ( '' ) print ( ' Move each truck to cluster center.' ) c[0,0] = np.mean ( x[rc] ) c[0,1] = np.mean ( y[rc] ) c[1,0] = np.mean ( x[gc] ) c[1,1] = np.mean ( y[gc] ) c[2,0] = np.mean ( x[bc] ) c[2,1] = np.mean ( y[bc] ) plt.plot ( x[rc], y[rc], 'r.', markersize = 20 ) plt.plot ( x[gc], y[gc], 'g.', markersize = 20 ) plt.plot ( x[bc], y[bc], 'b.', markersize = 20 ) plt.plot ( c[0,0], c[0,1], 'r*', markersize = 25 ) plt.plot ( c[1,0], c[1,1], 'g*', markersize = 25 ) plt.plot ( c[2,0], c[2,1], 'b*', markersize = 25 ) plt.xlabel ( '<-- X -->', fontsize = 16 ) plt.ylabel ( '<-- Y -->', fontsize = 16 ) plt.title ( 'Move Trucks to Centers, Step 8', fontsize = 16 ) plt.grid ( True ) filename = 'pizza_step8.png' plt.savefig ( filename ) plt.show ( ) plt.clf ( ) # # Terminate. # print ( '' ) print ( 'pizza_kmeans:' ) print ( ' Normal end of execution.' ) return if ( __name__ == '__main__' ): pizza_kmeans ( )