#! /usr/bin/env python3
#
def movie_review ( ):

#*****************************************************************************80
#
## movie_review uses keras to classify movie reviews.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    18 April 2019
#
#  Author:
#
#    Original version by Francois Chollet;
#    some modifications by John Burkardt.
#
#  Reference:
#
#    Francois Chollet,
#    Deep Learning with Python,
#    Manning, 2018,
#    ISBN: 9781617294433.
#
  import keras
  import platform
  
  print ( '' )
  print ( 'movie_review:' )
  print ( '  Python version: %s' % ( platform.python_version ( ) ) )
  print ( '  keras version: %s' % ( keras.__version__ ) )
  print ( '  Neural network to classify movie reviews.' )
#
#  Import the movie review dataset.
#
  from keras.datasets import imdb
#
#  Load the movie review dataset.
#  Only take the first 10,000 most frequent words.
#
  ( train_data, train_labels ), ( test_data, test_labels ) = \
    imdb.load_data ( num_words = 10000 )
#
#  As an example, list the first data item and its label.
#  Label means a negative review, and 1 means a positive review.
#
  print ( '' )
  print ( '  First data item and its label:' )
  print ( train_data[0] )
  print ( train_labels[0] )
#
#  Verify that the maximum over all training data of the maximum
#  word index in any given training data sequence is 9,999.
#
  print ( '' )
  print ( '  Check that the maximum word index is 9,999:' );
  print ( '  max word index = %d' % ( max ( [ max ( sequence ) for sequence in train_data ] ) ) )
#
#  Just for illustration, we show how to recover the actual words of a review
#  from the word index.
#
#  word_index is a dictionary mapping words to an integer index
#
  word_index = imdb.get_word_index()
#
#  We create a reverse dictionary, mapping integer indices to words.
#
  reverse_word_index = dict ( [(value, key) for (key, value) in word_index.items()] )
#
#  We decode the review; note that our indices were offset by 3
#  because 0, 1 and 2 are reserved indices for "padding", "start of sequence", and "unknown".
#
  decoded_review = ' '.join([reverse_word_index.get(i - 3, '?') for i in train_data[0]])

  print ( '' )
  print ( '  Print a decoded review:' )
  print ( decoded_review )

  import numpy as np
#
#  Create an all-zero matrix of shape (len(sequences), dimension)
#
  def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.  # set specific indices of results[i] to 1s
    return results
#
#  Our vectorized training data
#
  x_train = vectorize_sequences(train_data)
#
#  Our vectorized test data
#
  x_test = vectorize_sequences(test_data)

  x_train[0]
#
#  Our vectorized labels
#
  y_train = np.asarray(train_labels).astype('float32')
  y_test = np.asarray(test_labels).astype('float32')

  from keras import models
  from keras import layers

  model = models.Sequential()
  model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))
  model.add(layers.Dense(16, activation='relu'))
  model.add(layers.Dense(1, activation='sigmoid'))

  model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])


  from keras import optimizers

  model.compile(optimizer=optimizers.RMSprop(lr=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

  from keras import losses
  from keras import metrics

  model.compile(optimizer=optimizers.RMSprop(lr=0.001),
              loss=losses.binary_crossentropy,
              metrics=[metrics.binary_accuracy])


  x_val = x_train[:10000]
  partial_x_train = x_train[10000:]

  y_val = y_train[:10000]
  partial_y_train = y_train[10000:]

  history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=20,
                    batch_size=512,
                    validation_data=(x_val, y_val))

  history_dict = history.history
  print ( history_dict.keys() )

  import matplotlib.pyplot as plt

  acc = history.history['binary_accuracy']
  val_acc = history.history['val_binary_accuracy']
  loss = history.history['loss']
  val_loss = history.history['val_loss']

  epochs = range ( 1, len(acc) + 1 )
#
#  "bo" is for "blue dot"
#
  plt.plot ( epochs, loss, 'bo', label = 'Training loss' )
#
#  b is for "solid blue line"
#
  plt.plot ( epochs, val_loss, 'b', label = 'Validation loss' )
  plt.title ( 'Training and validation loss' )
  plt.xlabel ( 'Epochs' )
  plt.ylabel ( 'Loss' )
  plt.legend()
  filename = 'movie_review_loss.png'
  plt.savefig ( filename )
  print ( '' )
  print ( '  Graphics saved as "%s"' % ( filename ) )
  plt.show()
#
#  Clear, to prepare another figure.
#
  plt.clf()
  plt.plot ( epochs, acc, 'bo', label = 'Training acc' )
  plt.plot ( epochs, val_acc, 'b', label = 'Validation acc' )
  plt.title ( 'Training and validation accuracy' )
  plt.xlabel ( 'Epochs' )
  plt.ylabel ( 'Loss' )
  plt.legend()
  filename = 'movie_review_accuracy.png'
  plt.savefig ( filename )
  print ( '' )
  print ( '  Graphics saved as "%s"' % ( filename ) )
  plt.show()

  model = models.Sequential()
  model.add ( layers.Dense ( 16, activation='relu' , input_shape=(10000,) ) )
  model.add ( layers.Dense ( 16, activation='relu' ) )
  model.add ( layers.Dense ( 1, activation='sigmoid' ) )

  model.compile ( \
    optimizer = 'rmsprop', \
    loss = 'binary_crossentropy', \
    metrics = ['accuracy'])

  model.fit ( x_train, y_train, epochs = 4, batch_size = 512 )

  print ( '' )
  print ( '  Test the model:' )
  results = model.evaluate ( x_test, y_test )
  print ( '' )
  print ( '  Model loss and accuracy on test data:' )
  for i in range ( len ( model.metrics_names ) ):
    print ( model.metrics_names[i], results[i] )

  print ( '' )
  print ( '  Model predictions on test data:' )
  print ( '' )

  x_predict = model.predict ( x_test )
  print ( x_predict )
#
#  Terminate.
#
  print ( '' )
  print ( 'movie_review:' )
  print ( '  Normal end of execution.' )
  return

def timestamp ( ):

#*****************************************************************************80
#
## TIMESTAMP prints the date as a timestamp.
#
#  Licensing:
#
#    This code is distributed under the MIT license. 
#
#  Modified:
#
#    06 April 2013
#
#  Author:
#
#    John Burkardt
#
#  Parameters:
#
#    None
#
  import time

  t = time.time ( )
  print ( time.ctime ( t ) )

  return None

if ( __name__ == '__main__' ):
  timestamp ( )
  movie_review ( )
  timestamp ( )