#! /usr/bin/env python3 # def imdb ( ): #*****************************************************************************80 # ## imdb uses keras to classify movie reviews. # # Licensing: # # This code is distributed under the GNU LGPL license. # # Modified: # # 12 November 2019 # # Author: # # Original version by Francois Chollet; # some modifications by John Burkardt. # # Reference: # # Francois Chollet, # Deep Learning with Python, # Manning, 2018, # ISBN: 9781617294433. # import keras import numpy as np import platform print ( '' ) print ( 'imdb:' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' keras version: %s' % ( keras.__version__ ) ) print ( ' Neural network to classify movie reviews.' ) # # Import the movie review dataset. # from keras.datasets import imdb # # Load the movie review dataset, using the 10,000 most frequent words. # word_num = 10000 ( train_data, train_labels ), ( test_data, test_labels ) = \ imdb.load_data ( num_words = word_num ) # # Reformat the movie reviews as 0/1 vectors of length 10,000. # x_train = vectorize_sequences ( train_data ) x_test = vectorize_sequences ( test_data ) # # Convert label arrays to the proper numeric type. # y_train = np.asarray(train_labels).astype('float32') y_test = np.asarray(test_labels).astype('float32') # # Split the 25,000 data items into 10,000 validation and 15,000 training items. # x_val = x_train[:10000] partial_x_train = x_train[10000:] y_val = y_train[:10000] partial_y_train = y_train[10000:] # # Describe the model. # from keras import models from keras import layers model = models.Sequential() model.add ( layers.Dense ( 16, activation = 'relu', input_shape = (word_num,) ) ) model.add ( layers.Dense ( 16, activation = 'relu' ) ) model.add ( layers.Dense ( 1, activation = 'sigmoid' ) ) # # Create the model. # model.compile ( optimizer = 'rmsprop', loss = 'binary_crossentropy', metrics = ['accuracy'] ) # # Use the model for training and validation. # history = model.fit ( partial_x_train, partial_y_train, epochs = 20, batch_size = 512, validation_data = (x_val, y_val) ) # # Report the validation loss and accuracy. # history_dict = history.history acc = history.history['accuracy'] val_acc = history.history['val_accuracy'] loss = history.history['loss'] val_loss = history.history['val_loss'] print ( '' ) print ( ' Model loss and accuracy on validation data:' ) print ( ' Final validation loss', val_loss[-1] ) print ( ' Final validation accuracy', val_acc[-1] ) # # Apply the model to new test data. # print ( '' ) print ( ' Test the model:' ) results = model.evaluate ( x_test, y_test ) # # Report the test loss and accuracy. # print ( '' ) print ( ' Model loss and accuracy on test data:' ) for i in range ( len ( model.metrics_names ) ): print ( model.metrics_names[i], results[i] ) # # Demonstrate what the model predicts on some of the test data. # This is turned off for now. # if ( False ): x_predict = model.predict ( x_test ) x_predict = np.ndarray.flatten ( x_predict ) print ( '' ) print ( ' Model predictions on first 20 test movie reviews:' ) print ( ' # Predict Actual' ) for i in range ( 0, 20 ): print ( ' %2d %6.4f %d' % ( i, x_predict[i], test_labels[i] ) ) # # Terminate. # print ( '' ) print ( 'imdb:' ) print ( ' Normal end of execution.' ) return def vectorize_sequences ( sequences, dimension = 10000 ): # # This "vectorize" function replaces each numerically coded movie review # by a vector of length 10,000. The i-th entry of this vector is 1 if # the i-th word occurs one or more times in the review. # import numpy as np results = np.zeros((len(sequences), dimension)) for i, sequence in enumerate(sequences): results[i, sequence] = 1.0 return results if ( __name__ == '__main__' ): imdb ( )