#! /usr/bin/env python3
#
def imdb_dataset_load ( ):

#*****************************************************************************80
#
## imdb_dataset_load() ensures there is a local copy of the IMDB dataset.
#
#  Discussion:
#
#    A local copy of the dataset should be located in .keras/datasets
#    comprising:
#      imdb.npz
#      imdb_word_index.json
#
#    On some HPC systems, computational nodes cannot access the internet,
#    and hence cannot download a remote copy of the IMDB dataset.
#    Running this program ensures that there is a local copy available.
#
#  Licensing:
#
#    This code is distributed under the MIT license.
#
#  Modified:
#
#    11 November 2019
#
#  Author:
#
#    John Burkardt.
#
#  Reference:
#
#    Francois Chollet,
#    Deep Learning with Python,
#    Manning, 2018,
#    ISBN: 9781617294433.
#
  import keras
  import numpy as np
  import platform
  import tensorflow
  
  print ( '' )
  print ( 'imdb_dataset_load():' )
  print ( '  python version:     ' + platform.python_version ( ) )
  print ( '  numpy version:      ' + np.version.version )
  print ( '  tensorflow version: ' + tensorflow.__version__ )
  print ( '  keras version:      ' + keras.__version__ )
  print ( '  This code ensures there is a local copy of the IMDB dataset.' )
#
#  The files may already exist.
#
  import os
  import pathlib

  home = os.getenv ( 'HOME' )
  filename1 = home + '/.keras/datasets/imdb.npz'
  filename2 = home + '/.keras/datasets/imdb_word_index.json'
  filepath1 = pathlib.Path ( filename1 )
  filepath2 = pathlib.Path ( filename2 )
  if ( not filepath1.exists ( ) ):
    print ( '  IMDB data file', filename1, ' was not found locally.' )
  elif ( not filepath2.exists ( ) ):
    print ( '  IMDB data file', filename2, ' was not found locally.' )
  else:
    print ( '' )
    print ( '  Both IMDB data files are already locally accessible.' )
    print ( '' )
    print ( 'imdb_dataset_load():' )
    print ( '  Normal end of execution.' )
    return
#
#  Import the movie review dataset.
#
  from keras.datasets import imdb
#
#  I was getting an error about loading data, and found this fix.
#
  np_load_old = np.load
  np.load = lambda *a,**k: np_load_old(*a,allow_pickle=True, **k)

  if ( filepath1.exists ( ) and filepath2.exists ( ) ):
    print ( '' )
    print ( '  A local copy of the IMDB dataset has been set up.' )
  else:
    print ( '' )
    print ( 'imdb_dataset_load():' )
    print ( '  We failed to set up a local copy of the IMDB dataset.' )
    print ( '  Abnormal end of execution.' )
    return
#
#  Load the movie review dataset.
#  Only take the first 10,000 most frequent words.
#
  ( train_data, train_labels ), ( test_data, test_labels ) = \
    imdb.load_data ( num_words = 10000 )
#
#  Restore the standard load function.
#
  np.load = np_load_old
#
#  As a check, list the first data item and its label.
#  Label means a negative review, and 1 means a positive review.
#
  print ( '' )
  print ( '  First data item and its label:' )
  print ( train_data[0] )
  print ( train_labels[0] )
#
#  Terminate.
#
  print ( '' )
  print ( 'imdb_dataset_load():' )
  print ( '  Normal end of execution.' )
  return

def timestamp ( ):

#*****************************************************************************80
#
## timestamp() prints the date as a timestamp.
#
#  Licensing:
#
#    This code is distributed under the MIT license. 
#
#  Modified:
#
#    06 April 2013
#
#  Author:
#
#    John Burkardt
#
  import time

  t = time.time ( )
  print ( time.ctime ( t ) )

  return None

if ( __name__ == '__main__' ):
  timestamp ( )
  imdb_dataset_load ( )
  timestamp ( )