#! /usr/bin/env python3
#
def imdb ( ):

#*****************************************************************************80
#
## imdb uses keras to classify movie reviews.
#
#  Licensing:
#
#    This code is distributed under the GNU LGPL license.
#
#  Modified:
#
#    12 November 2019
#
#  Author:
#
#    Original version by Francois Chollet;
#    some modifications by John Burkardt.
#
#  Reference:
#
#    Francois Chollet,
#    Deep Learning with Python,
#    Manning, 2018,
#    ISBN: 9781617294433.
#
  import keras
  import numpy as np
  import platform
  
  print ( '' )
  print ( 'imdb:' )
  print ( '  Python version: %s' % ( platform.python_version ( ) ) )
  print ( '  keras version: %s' % ( keras.__version__ ) )
  print ( '  Neural network to classify movie reviews.' )
#
#  Import the movie review dataset.
#
  from keras.datasets import imdb
#
#  Load the movie review dataset, using the 10,000 most frequent words.
#
  word_num = 10000
  ( train_data, train_labels ), ( test_data, test_labels ) = \
    imdb.load_data ( num_words = word_num )
#
#  Reformat the movie reviews as 0/1 vectors of length 10,000.
#
  x_train = vectorize_sequences ( train_data )
  x_test = vectorize_sequences ( test_data )
#
#  Convert label arrays to the proper numeric type.
#
  y_train = np.asarray(train_labels).astype('float32')
  y_test = np.asarray(test_labels).astype('float32')
#
#  Split the 25,000 data items into 10,000 validation and 15,000 training items.
#
  x_val = x_train[:10000]
  partial_x_train = x_train[10000:]

  y_val = y_train[:10000]
  partial_y_train = y_train[10000:]
#
#  Describe the model.
#
  from keras import models
  from keras import layers

  model = models.Sequential()

  model.add ( layers.Dense ( 16, activation = 'relu', input_shape = (word_num,) ) )
  model.add ( layers.Dense ( 16, activation = 'relu' ) )
  model.add ( layers.Dense ( 1, activation = 'sigmoid' ) )
#
#  Create the model.
#
  model.compile ( 
    optimizer = 'rmsprop',
    loss = 'binary_crossentropy',
    metrics = ['accuracy'] )
#
#  Use the model for training and validation.
#
  history = model.fit ( 
    partial_x_train,
    partial_y_train,
    epochs = 20,
    batch_size = 512,
    validation_data = (x_val, y_val) )
#
#  Report the validation loss and accuracy.
#
  history_dict = history.history

  acc = history.history['accuracy']
  val_acc = history.history['val_accuracy']
  loss = history.history['loss']
  val_loss = history.history['val_loss']
  print ( '' )
  print ( '  Model loss and accuracy on validation data:' )
  print ( '    Final validation loss', val_loss[-1] )
  print ( '    Final validation accuracy', val_acc[-1] )
#
#  Apply the model to new test data.
#
  print ( '' )
  print ( '  Test the model:' )
  results = model.evaluate ( x_test, y_test )
#
#  Report the test loss and accuracy.
#
  print ( '' )
  print ( '  Model loss and accuracy on test data:' )
  for i in range ( len ( model.metrics_names ) ):
    print ( model.metrics_names[i], results[i] )
#
#  Demonstrate what the model predicts on some of the test data.
#  This is turned off for now.
#
  if ( False ):
    x_predict = model.predict ( x_test )
    x_predict = np.ndarray.flatten ( x_predict )

    print ( '' )
    print ( '  Model predictions on first 20 test movie reviews:' )
    print ( '   # Predict  Actual' )

    for i in range ( 0, 20 ):
      print ( '  %2d  %6.4f    %d' % ( i, x_predict[i], test_labels[i] ) )
#
#  Terminate.
#
  print ( '' )
  print ( 'imdb:' )
  print ( '  Normal end of execution.' )
  return

def vectorize_sequences ( sequences, dimension = 10000 ):
#
#  This "vectorize" function replaces each numerically coded movie review
#  by a vector of length 10,000.  The i-th entry of this vector is 1 if
#  the i-th word occurs one or more times in the review.
#
  import numpy as np

  results = np.zeros((len(sequences), dimension))

  for i, sequence in enumerate(sequences):
    results[i, sequence] = 1.0

  return results

if ( __name__ == '__main__' ):
  imdb ( )