#! /usr/bin/env python3 # def imdb ( ): #*****************************************************************************80 # ## imdb() uses keras to classify movie reviews. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 12 November 2019 # # Author: # # Original version by Francois Chollet; # This version by John Burkardt. # # Reference: # # Francois Chollet, # Deep Learning with Python, # Manning, 2018, # ISBN: 9781617294433. # import numpy as np import platform import tensorflow # # Use the word_num most frequent words. # # word_num = 10000 word_num = 1000 print ( '' ) print ( 'imdb():' ) print ( ' python version: ' + platform.python_version ( ) ) print ( ' numpy version: ' + np.version.version ) # print ( ' keras version: ' + keras.__version__ ) print ( ' tensorflow version: ' + tensorflow.__version__ ) print ( '' ) print ( ' Use a neural network to classify movie reviews as negative (0)' ) print ( ' or positive (1) based on the usage of the ', word_num ) print ( ' most common words.' ) # # Import the movie review dataset. # from tensorflow.keras.datasets import imdb # # Load the movie review dataset. # ( train_data, train_labels ), ( test_data, test_labels ) = \ imdb.load_data ( num_words = word_num ) # # Print a sample of the data and its label. # print ( '' ) print ( ' Sample training data #0:' ) print ( train_data[0] ) print ( ' Label for sample training data #0:' ) print ( train_labels[0] ) # # Verify that no word index exceeds word_num. # max_index = max ( max ( sequence ) for sequence in train_data ) print ( '' ) print ( ' Maximum index in training data is ', max_index ) # # Reformat the movie reviews as 0/1 vectors of length word_num. # x_train = vectorize_sequences ( train_data ) x_test = vectorize_sequences ( test_data ) # # Print the sample of the data after conversion to binary vector. # print ( '' ) print ( ' Sample training data #0 after vectorization:' ) print ( train_data[0] ) print ( ' Label for sample training data #0:' ) print ( train_labels[0] ) # # Also convert label arrays to the proper numeric type. # y_train = np.asarray ( train_labels ).astype('float32') y_test = np.asarray ( test_labels ).astype('float32') # # Define the model. # from tensorflow import keras from tensorflow.keras import layers # # Describe the model. # model = keras.Sequential( [ layers.Dense ( 16, activation = 'relu' ), layers.Dense ( 16, activation = 'relu' ), layers.Dense ( 1, activation = 'sigmoid' ) ] ) # # Compile the model. # model.compile ( optimizer = 'rmsprop', loss = 'binary_crossentropy', metrics = ['accuracy'] ) # # Split the 25,000 data items into 10,000 validation and 15,000 training items. # x_val = x_train[:10000] partial_x_train = x_train[10000:] y_val = y_train[:10000] partial_y_train = y_train[10000:] # # Use the model for training and validation. # history = model.fit ( partial_x_train, partial_y_train, epochs = 20, batch_size = 512, validation_data = (x_val, y_val) ) # # Report the validation loss and accuracy. # history_dict = history.history acc = history.history['accuracy'] val_acc = history.history['val_accuracy'] loss = history.history['loss'] val_loss = history.history['val_loss'] print ( '' ) print ( ' Model loss and accuracy on validation data:' ) print ( ' Final validation loss', val_loss[-1] ) print ( ' Final validation accuracy', val_acc[-1] ) # # Apply the model to new test data. # print ( '' ) print ( ' Test the model:' ) results = model.evaluate ( x_test, y_test ) # # Report the test loss and accuracy. # print ( '' ) print ( ' Model loss and accuracy on test data:' ) for i in range ( len ( model.metrics_names ) ): print ( model.metrics_names[i], results[i] ) # # Demonstrate what the model predicts on some of the test data. # x_predict = model.predict ( x_test ) x_predict = np.ndarray.flatten ( x_predict ) print ( '' ) print ( ' Model predictions on first 20 test movie reviews:' ) print ( ' # Predict Actual' ) for i in range ( 0, 20 ): print ( ' %2d %6.4f %d' % ( i, x_predict[i], test_labels[i] ) ) # # Terminate. # print ( '' ) print ( 'imdb():' ) print ( ' Normal end of execution.' ) return def vectorize_sequences ( sequences, dimension = 10000 ): #*****************************************************************************80 # ## vectorize_sequences() replaces each review by a vector. # # Discussion: # # This "vectorize" function replaces each numerically coded movie review # by a vector of length 10,000. The i-th entry of this vector is 1 if # the i-th word occurs one or more times in the review. # import numpy as np results = np.zeros ( ( len ( sequences ), dimension ) ) for i, sequence in enumerate ( sequences ): for j in sequence: results[i,j] = 1.0 return results def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 21 August 2019 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return if ( __name__ == '__main__' ): timestamp ( ) imdb ( ) timestamp ( )