#! /usr/bin/env python3 # def movie_review ( ): #*****************************************************************************80 # ## movie_review() uses keras to classify movie reviews. # # Discussion: # # This code no longer runs properly, because of newer versions of # keras() and tensorflow()! # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 01 January 2025 # # Author: # # Original version by Francois Chollet; # This version by John Burkardt. # # Reference: # # Francois Chollet, # Deep Learning with Python, # Second Edition, # Manning, 2021, # ISBN: 9781617296864. # import keras import numpy as np import platform import tensorflow print ( '' ) print ( 'movie_review():' ) print ( ' python version: ' + platform.python_version ( ) ) print ( ' numpy version: ' + np.version.version ) print ( ' tensorflow version: ' + tensorflow.__version__ ) print ( ' keras version: ' + keras.__version__ ) print ( ' Neural network to classify movie reviews.' ) # # Import the movie review dataset. # from tensorflow.keras.datasets import imdb # # Load the movie review dataset. # Only take the first 5,000 most frequent words. # ( train_data, train_labels ), ( test_data, test_labels ) = \ imdb.load_data ( num_words = 5000 ) # # As an example, list the first data item and its label. # Label means a negative review, and 1 means a positive review. # print ( '' ) print ( ' First data item and its label:' ) print ( train_data[0] ) print ( train_labels[0] ) # # Verify that the maximum over all training data of the maximum # word index in any given training data sequence is 4,999. # print ( '' ) print ( ' Check that the maximum word index is 4,999:' ); print ( ' max word index = %d' \ % ( max ( [ max ( sequence ) for sequence in train_data ] ) ) ) # # Just for illustration, we show how to recover the actual words of a review # from the word index. # # word_index is a dictionary mapping words to an integer index # word_index = imdb.get_word_index() # # We create a reverse dictionary, mapping integer indices to words. # reverse_word_index = dict ( \ [(value, key) for (key, value) in word_index.items()] ) # # We decode a review; note that our indices were offset by 3 # because 0, 1 and 2 are reserved indices for "padding", # "start of sequence", and "unknown". # decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in train_data[0]]) print ( '' ) print ( ' Print a decoded review:' ) print ( decoded_review ) import numpy as np # # Create an all-zero matrix of shape (len(sequences), dimension) # def vectorize_sequences ( sequences, dimension = 5000 ): results = np.zeros((len(sequences), dimension)) for i, sequence in enumerate(sequences): results[i, sequence] = 1.0 return results # # Our vectorized training data # x_train = vectorize_sequences ( train_data ) # # Our vectorized test data # x_test = vectorize_sequences ( test_data ) print ( '' ) print ( 'First item of training data, after vectorization.' ) print ( x_train[0] ) # # Our vectorized labels # y_train = np.asarray ( train_labels ).astype('float32') y_test = np.asarray ( test_labels ).astype('float32') # # Now the data is ready to be fed into a neural network. # from tensorflow import keras from tensorflow.keras import layers model = keras.Sequential ( [ \ layers.Dense(16, activation='relu' ), \ layers.Dense(16, activation='relu' ), \ layers.Dense(1, activation='sigmoid') \ ] ) model.compile ( \ optimizer = 'rmsprop', \ loss = 'binary_crossentropy', \ metrics = ['accuracy'] ) # # Set aside a validation set. # x_val = x_train[:5000] partial_x_train = x_train[5000:] y_val = y_train[:5000] partial_y_train = y_train[5000:] # # Now we train the model for 20 epochs in batches of 512. # history = model.fit ( \ partial_x_train, partial_y_train, epochs = 20, batch_size = 512, validation_data = ( x_val, y_val ) ) # # Let's examine the history dictionary. # history_dict = history.history print ( history_dict.keys() ) # # Plot the training and validation loss. # import matplotlib.pyplot as plt loss_values = history_dict['loss'] val_loss_values = history_dict['val_loss'] epochs = range ( 1, len(loss_values) + 1 ) plt.plot ( epochs, loss_values, 'bo', label = 'Training loss' ) plt.plot ( epochs, val_loss_values, 'b', label = 'Validation loss' ) plt.grid ( True ) plt.title ( 'Training and validation loss' ) plt.xlabel ( 'Epochs' ) plt.ylabel ( 'Loss' ) plt.legend() filename = 'movie_review_loss.png' plt.savefig ( filename ) print ( '' ) print ( ' Graphics saved as "' + filename + '"' ) plt.show() # # Plot the training and validation accuracy. # acc = history_dict['accuracy'] val_acc = history_dict['val_accuracy'] plt.clf() plt.plot ( epochs, acc, 'bo', label = 'Training accuracy' ) plt.plot ( epochs, val_acc, 'b', label = 'Validation accuracy' ) plt.title ( 'Training and validation accuracy' ) plt.grid ( True ) plt.xlabel ( 'Epochs' ) plt.ylabel ( 'Loss' ) plt.legend() filename = 'movie_review_accuracy.png' plt.savefig ( filename ) print ( '' ) print ( ' Graphics saved as "' + filename + '"' ) plt.show() # # From the results, it looks like we should avoid overfitting by # stopping after 4 epochs. # model = keras.Sequential( [ layers.Dense ( 16, activation='relu' ), layers.Dense ( 16, activation='relu' ), layers.Dense ( 1, activation='sigmoid' ) ] ) model.compile ( \ optimizer = 'rmsprop', \ loss = 'binary_crossentropy', \ metrics = ['accuracy'] ) model.fit ( x_train, y_train, epochs = 4, batch_size = 512 ) # # Now run the test data. # print ( '' ) print ( ' Test the model:' ) results = model.evaluate ( x_test, y_test ) print ( '' ) print ( ' Model loss and accuracy on test data:' ) for i in range ( len ( model.metrics_names ) ): print ( model.metrics_names[i], results[i] ) print ( '' ) print ( ' Model predictions on test data:' ) print ( '' ) x_predict = model.predict ( x_test ) print ( x_predict ) # # Terminate. # print ( '' ) print ( 'movie_review():' ) print ( ' Normal end of execution.' ) return def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 06 April 2013 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return None if ( __name__ == '__main__' ): timestamp ( ) movie_review ( ) timestamp ( )