#! /usr/bin/env python3 # def imdb_dataset_load ( ): #*****************************************************************************80 # ## imdb_dataset_load() ensures there is a local copy of the IMDB dataset. # # Discussion: # # A local copy of the dataset should be located in .keras/datasets # comprising: # imdb.npz # imdb_word_index.json # # On some HPC systems, computational nodes cannot access the internet, # and hence cannot download a remote copy of the IMDB dataset. # Running this program ensures that there is a local copy available. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 11 November 2019 # # Author: # # John Burkardt. # # Reference: # # Francois Chollet, # Deep Learning with Python, # Manning, 2018, # ISBN: 9781617294433. # import keras import numpy as np import platform import tensorflow print ( '' ) print ( 'imdb_dataset_load():' ) print ( ' python version: ' + platform.python_version ( ) ) print ( ' numpy version: ' + np.version.version ) print ( ' tensorflow version: ' + tensorflow.__version__ ) print ( ' keras version: ' + keras.__version__ ) print ( ' This code ensures there is a local copy of the IMDB dataset.' ) # # The files may already exist. # import os import pathlib home = os.getenv ( 'HOME' ) filename1 = home + '/.keras/datasets/imdb.npz' filename2 = home + '/.keras/datasets/imdb_word_index.json' filepath1 = pathlib.Path ( filename1 ) filepath2 = pathlib.Path ( filename2 ) if ( not filepath1.exists ( ) ): print ( ' IMDB data file', filename1, ' was not found locally.' ) elif ( not filepath2.exists ( ) ): print ( ' IMDB data file', filename2, ' was not found locally.' ) else: print ( '' ) print ( ' Both IMDB data files are already locally accessible.' ) print ( '' ) print ( 'imdb_dataset_load():' ) print ( ' Normal end of execution.' ) return # # Import the movie review dataset. # from keras.datasets import imdb # # I was getting an error about loading data, and found this fix. # np_load_old = np.load np.load = lambda *a,**k: np_load_old(*a,allow_pickle=True, **k) if ( filepath1.exists ( ) and filepath2.exists ( ) ): print ( '' ) print ( ' A local copy of the IMDB dataset has been set up.' ) else: print ( '' ) print ( 'imdb_dataset_load():' ) print ( ' We failed to set up a local copy of the IMDB dataset.' ) print ( ' Abnormal end of execution.' ) return # # Load the movie review dataset. # Only take the first 10,000 most frequent words. # ( train_data, train_labels ), ( test_data, test_labels ) = \ imdb.load_data ( num_words = 10000 ) # # Restore the standard load function. # np.load = np_load_old # # As a check, list the first data item and its label. # Label means a negative review, and 1 means a positive review. # print ( '' ) print ( ' First data item and its label:' ) print ( train_data[0] ) print ( train_labels[0] ) # # Terminate. # print ( '' ) print ( 'imdb_dataset_load():' ) print ( ' Normal end of execution.' ) return def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 06 April 2013 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return None if ( __name__ == '__main__' ): timestamp ( ) imdb_dataset_load ( ) timestamp ( )