#! /usr/bin/env python3 # def movie_review ( ): #*****************************************************************************80 # ## movie_review uses keras to classify movie reviews. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 18 April 2019 # # Author: # # Original version by Francois Chollet; # some modifications by John Burkardt. # # Reference: # # Francois Chollet, # Deep Learning with Python, # Manning, 2018, # ISBN: 9781617294433. # import keras import platform print ( '' ) print ( 'movie_review:' ) print ( ' Python version: %s' % ( platform.python_version ( ) ) ) print ( ' keras version: %s' % ( keras.__version__ ) ) print ( ' Neural network to classify movie reviews.' ) # # Import the movie review dataset. # from keras.datasets import imdb # # Load the movie review dataset. # Only take the first 10,000 most frequent words. # ( train_data, train_labels ), ( test_data, test_labels ) = \ imdb.load_data ( num_words = 10000 ) # # As an example, list the first data item and its label. # Label means a negative review, and 1 means a positive review. # print ( '' ) print ( ' First data item and its label:' ) print ( train_data[0] ) print ( train_labels[0] ) # # Verify that the maximum over all training data of the maximum # word index in any given training data sequence is 9,999. # print ( '' ) print ( ' Check that the maximum word index is 9,999:' ); print ( ' max word index = %d' % ( max ( [ max ( sequence ) for sequence in train_data ] ) ) ) # # Just for illustration, we show how to recover the actual words of a review # from the word index. # # word_index is a dictionary mapping words to an integer index # word_index = imdb.get_word_index() # # We create a reverse dictionary, mapping integer indices to words. # reverse_word_index = dict ( [(value, key) for (key, value) in word_index.items()] ) # # We decode the review; note that our indices were offset by 3 # because 0, 1 and 2 are reserved indices for "padding", "start of sequence", and "unknown". # decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in train_data[0]]) print ( '' ) print ( ' Print a decoded review:' ) print ( decoded_review ) import numpy as np # # Create an all-zero matrix of shape (len(sequences), dimension) # def vectorize_sequences(sequences, dimension=10000): results = np.zeros((len(sequences), dimension)) for i, sequence in enumerate(sequences): results[i, sequence] = 1. # set specific indices of results[i] to 1s return results # # Our vectorized training data # x_train = vectorize_sequences(train_data) # # Our vectorized test data # x_test = vectorize_sequences(test_data) x_train[0] # # Our vectorized labels # y_train = np.asarray(train_labels).astype('float32') y_test = np.asarray(test_labels).astype('float32') from keras import models from keras import layers model = models.Sequential() model.add(layers.Dense(16, activation='relu', input_shape=(10000,))) model.add(layers.Dense(16, activation='relu')) model.add(layers.Dense(1, activation='sigmoid')) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) from keras import optimizers model.compile(optimizer=optimizers.RMSprop(lr=0.001), loss='binary_crossentropy', metrics=['accuracy']) from keras import losses from keras import metrics model.compile(optimizer=optimizers.RMSprop(lr=0.001), loss=losses.binary_crossentropy, metrics=[metrics.binary_accuracy]) x_val = x_train[:10000] partial_x_train = x_train[10000:] y_val = y_train[:10000] partial_y_train = y_train[10000:] history = model.fit(partial_x_train, partial_y_train, epochs=20, batch_size=512, validation_data=(x_val, y_val)) history_dict = history.history print ( history_dict.keys() ) import matplotlib.pyplot as plt acc = history.history['binary_accuracy'] val_acc = history.history['val_binary_accuracy'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range ( 1, len(acc) + 1 ) # # "bo" is for "blue dot" # plt.plot ( epochs, loss, 'bo', label = 'Training loss' ) # # b is for "solid blue line" # plt.plot ( epochs, val_loss, 'b', label = 'Validation loss' ) plt.title ( 'Training and validation loss' ) plt.xlabel ( 'Epochs' ) plt.ylabel ( 'Loss' ) plt.legend() filename = 'movie_review_loss.png' plt.savefig ( filename ) print ( '' ) print ( ' Graphics saved as "%s"' % ( filename ) ) plt.show() # # Clear, to prepare another figure. # plt.clf() plt.plot ( epochs, acc, 'bo', label = 'Training acc' ) plt.plot ( epochs, val_acc, 'b', label = 'Validation acc' ) plt.title ( 'Training and validation accuracy' ) plt.xlabel ( 'Epochs' ) plt.ylabel ( 'Loss' ) plt.legend() filename = 'movie_review_accuracy.png' plt.savefig ( filename ) print ( '' ) print ( ' Graphics saved as "%s"' % ( filename ) ) plt.show() model = models.Sequential() model.add ( layers.Dense ( 16, activation='relu' , input_shape=(10000,) ) ) model.add ( layers.Dense ( 16, activation='relu' ) ) model.add ( layers.Dense ( 1, activation='sigmoid' ) ) model.compile ( \ optimizer = 'rmsprop', \ loss = 'binary_crossentropy', \ metrics = ['accuracy']) model.fit ( x_train, y_train, epochs = 4, batch_size = 512 ) print ( '' ) print ( ' Test the model:' ) results = model.evaluate ( x_test, y_test ) print ( '' ) print ( ' Model loss and accuracy on test data:' ) for i in range ( len ( model.metrics_names ) ): print ( model.metrics_names[i], results[i] ) print ( '' ) print ( ' Model predictions on test data:' ) print ( '' ) x_predict = model.predict ( x_test ) print ( x_predict ) # # Terminate. # print ( '' ) print ( 'movie_review:' ) print ( ' Normal end of execution.' ) return def timestamp ( ): #*****************************************************************************80 # ## TIMESTAMP prints the date as a timestamp. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 06 April 2013 # # Author: # # John Burkardt # # Parameters: # # None # import time t = time.time ( ) print ( time.ctime ( t ) ) return None if ( __name__ == '__main__' ): timestamp ( ) movie_review ( ) timestamp ( )