#! /usr/bin/env python3 # def dictionary_code_test ( ): #*****************************************************************************80 # ## dictionary_code_test() tests dictionary_code(). # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 15 March 2025 # # Author: # # John Burkardt # import numpy as np import platform print ( '' ) print ( 'dictionary_code_test():' ) print ( ' python version: ' + platform.python_version ( ) ) print ( ' numpy version: ' + np.version.version ) print ( ' Test dictionary_code().' ) dictionary_encode ( ) dictionary_decode ( ) # # Terminate. # print ( '' ) print ( 'dictionary_code_test():' ) print ( ' Normal end of execution.' ) return def dictionary_decode ( ): #*****************************************************************************80 # ## dictionary_decode() applies dictionary decoding to a text file. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 15 March 2025 # # Author: # # John Burkardt # import numpy as np print ( '' ) print ( 'dictionary_decode():' ) print ( ' Apply dictionary decoding to a file.' ) print ( '' ) # # Read the dictionary file. # filename = 'gettysburg_address_dictionary.txt' file = open ( filename, 'r' ) word_unique_list = [ line.rstrip() for line in file ] file.close ( ) # # Read the encoded text file. # filename = 'gettysburg_address_encoded.txt' file = open ( filename, 'r' ) indices = np.loadtxt ( file, dtype = int ) file.close ( ) # # Replace each index in the encoded text file by the corresponding # word from the dictionary. # word_num = len ( indices ) for index in indices: print ( word_unique_list[index] ) # # Terminate. # print ( '' ) print ( 'dictionary_decode():' ) print ( ' Normal end of execution.' ) return def dictionary_encode ( ): #*****************************************************************************80 # ## dictionary_encode() applies dictionary encoding to a text file. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 15 March 2025 # # Author: # # John Burkardt # print ( '' ) print ( 'dictionary_encode():' ) print ( ' Apply dictionary encoding to a file.' ) # # Read a text into a cell array. # filename = 'gettysburg_address2.txt' file = open ( filename, 'r' ) data_string = file.read ( ) word_list = data_string.split() word_num = len ( word_list ) file.close ( ) # # Count characters and words in input file. # char_num = 0 for word in word_list: char_num = char_num + len ( word ) print ( ' Text contains ', char_num, ' characters.' ) print ( ' Text contains ', word_num, ' words.' ) # # Collect the unique words in the text. # word_unique_list = list ( set ( word_list ) ) word_unique_num = len ( word_unique_list ) print ( ' Text contains ', word_unique_num, ' unique words.' ) # # Sort the unique words. # word_unique_list = sorted ( word_unique_list ) # # Print the unique words. # i = 0 for word in word_unique_list: print ( ' %4d %s' % ( i, word ) ) i = i + 1 # # Write the unique words to a "dictionary" file. # filename = 'gettysburg_address_dictionary.txt' output = open ( filename, 'wt' ) for word in word_unique_list: print ( word, file = output ) output.close ( ) # # Replace each word in the original text by the index of # that word in the dictionary. # filename = 'gettysburg_address_encoded.txt' output = open ( filename, 'wt' ) for word in word_list: index = word_unique_list.index ( word ) print ( index, file = output ) output.close ( ) # # Terminate. # print ( '' ) print ( 'dictionary_encode():' ) print ( ' Normal end of execution.' ) return def timestamp ( ): #*****************************************************************************80 # ## timestamp() prints the date as a timestamp. # # Licensing: # # This code is distributed under the MIT license. # # Modified: # # 21 August 2019 # # Author: # # John Burkardt # import time t = time.time ( ) print ( time.ctime ( t ) ) return if ( __name__ == '__main__' ): timestamp ( ) dictionary_code_test ( ) timestamp ( )