/************************************************************************* * * * N A S P A R A L L E L B E N C H M A R K S 3.3 * * * * S E R I A L V E R S I O N * * * * I S * * * ************************************************************************* * * * This benchmark is a serial version of the NPB IS code. * * Refer to NAS Technical Reports 95-020 for details. * * * * Permission to use, copy, distribute and modify this software * * for any purpose with or without fee is hereby granted. We * * request, however, that all derived work reference the NAS * * Parallel Benchmarks 3.3. This software is provided "as is" * * without express or implied warranty. * * * * Information on NPB 3.3, including the technical report, the * * original specifications, source code, results and information * * on how to submit new results, is available at: * * * * http://www.nas.nasa.gov/Software/NPB/ * * * * Send comments or suggestions to npb@nas.nasa.gov * * * * NAS Parallel Benchmarks Group * * NASA Ames Research Center * * Mail Stop: T27A-1 * * Moffett Field, CA 94035-1000 * * * * E-mail: npb@nas.nasa.gov * * Fax: (650) 604-3957 * * * ************************************************************************* * * * Author: M. Yarrow * * H. Jin * * * *************************************************************************/ # define COMPILETIME "02 Apr 2009" # define NPBVERSION "3.3" # define CC "/usr/bin/gcc" # define CFLAGS "-O" # define CLINK "$(CC)" # define CLINKFLAGS "-O" # define C_LIB "(none)" # define C_INC "(none)" # include # include # include # include /* C/Fortran interface is different on different machines. * You may need to tweak this. */ #if defined(IBM) #define wtime wtime #elif defined(CRAY) #define wtime WTIME #else #define wtime wtime_ #endif /*****************************************************************/ /* For serial IS, buckets are not really req'd to solve NPB1 IS */ /* spec, but their use on some machines improves performance, on */ /* other machines the use of buckets compromises performance, */ /* probably because it is extra computation which is not req'd. */ /* (Note: Mechanism not understood, probably cache related) */ /* Example: SP2-66MhzWN: 50% speedup with buckets */ /* Example: SGI Indy5000: 50% slowdown with buckets */ /* Example: SGI O2000: 400% slowdown with buckets (Wow!) */ /*****************************************************************/ /* To disable the use of buckets, comment out the following line */ #define USE_BUCKETS /******************/ /* default values */ /******************/ #ifndef CLASS #define CLASS 'S' #endif /*************/ /* CLASS S */ /*************/ #if CLASS == 'S' #define TOTAL_KEYS_LOG_2 16 #define MAX_KEY_LOG_2 11 #define NUM_BUCKETS_LOG_2 9 #endif /*************/ /* CLASS W */ /*************/ #if CLASS == 'W' #define TOTAL_KEYS_LOG_2 20 #define MAX_KEY_LOG_2 16 #define NUM_BUCKETS_LOG_2 10 #endif /*************/ /* CLASS A */ /*************/ #if CLASS == 'A' #define TOTAL_KEYS_LOG_2 23 #define MAX_KEY_LOG_2 19 #define NUM_BUCKETS_LOG_2 10 #endif /*************/ /* CLASS B */ /*************/ #if CLASS == 'B' #define TOTAL_KEYS_LOG_2 25 #define MAX_KEY_LOG_2 21 #define NUM_BUCKETS_LOG_2 10 #endif /*************/ /* CLASS C */ /*************/ #if CLASS == 'C' #define TOTAL_KEYS_LOG_2 27 #define MAX_KEY_LOG_2 23 #define NUM_BUCKETS_LOG_2 10 #endif /*************/ /* CLASS D */ /*************/ #if CLASS == 'D' #define TOTAL_KEYS_LOG_2 31 #define MAX_KEY_LOG_2 27 #define NUM_BUCKETS_LOG_2 10 #endif #if CLASS == 'D' #define TOTAL_KEYS (1L << TOTAL_KEYS_LOG_2) #else #define TOTAL_KEYS (1 << TOTAL_KEYS_LOG_2) #endif #define MAX_KEY (1 << MAX_KEY_LOG_2) #define NUM_BUCKETS (1 << NUM_BUCKETS_LOG_2) #define NUM_KEYS TOTAL_KEYS #define SIZE_OF_BUFFERS NUM_KEYS #define MAX_ITERATIONS 10 #define TEST_ARRAY_SIZE 5 /*************************************/ /* Typedef: if necessary, change the */ /* size of int here by changing the */ /* int type to, say, long */ /*************************************/ #if CLASS == 'D' typedef long INT_TYPE; #else typedef int INT_TYPE; #endif /********************/ /* Some global info */ /********************/ INT_TYPE *key_buff_ptr_global; /* used by full_verify to get */ /* copies of rank info */ int passed_verification; /************************************/ /* These are the three main arrays. */ /* See SIZE_OF_BUFFERS def above */ /************************************/ INT_TYPE key_array[SIZE_OF_BUFFERS], key_buff1[MAX_KEY], key_buff2[SIZE_OF_BUFFERS], partial_verify_vals[TEST_ARRAY_SIZE]; #ifdef USE_BUCKETS INT_TYPE bucket_size[NUM_BUCKETS], bucket_ptrs[NUM_BUCKETS]; #endif /**********************/ /* Partial verif info */ /**********************/ INT_TYPE test_index_array[TEST_ARRAY_SIZE], test_rank_array[TEST_ARRAY_SIZE], S_test_index_array[TEST_ARRAY_SIZE] = {48427,17148,23627,62548,4431}, S_test_rank_array[TEST_ARRAY_SIZE] = {0,18,346,64917,65463}, W_test_index_array[TEST_ARRAY_SIZE] = {357773,934767,875723,898999,404505}, W_test_rank_array[TEST_ARRAY_SIZE] = {1249,11698,1039987,1043896,1048018}, A_test_index_array[TEST_ARRAY_SIZE] = {2112377,662041,5336171,3642833,4250760}, A_test_rank_array[TEST_ARRAY_SIZE] = {104,17523,123928,8288932,8388264}, B_test_index_array[TEST_ARRAY_SIZE] = {41869,812306,5102857,18232239,26860214}, B_test_rank_array[TEST_ARRAY_SIZE] = {33422937,10244,59149,33135281,99}, C_test_index_array[TEST_ARRAY_SIZE] = {44172927,72999161,74326391,129606274,21736814}, C_test_rank_array[TEST_ARRAY_SIZE] = {61147,882988,266290,133997595,133525895}, D_test_index_array[TEST_ARRAY_SIZE] = {1317351170,995930646,1157283250,1503301535,1453734525}, D_test_rank_array[TEST_ARRAY_SIZE] = {1,36538729,1978098519,2145192618,2147425337}; /***********************/ /* function prototypes */ /***********************/ void c_print_results( char *name, char class, int n1, int n2, int n3, int niter, double t, double mops, char *optype, int passed_verification, char *npbversion, char *compiletime, char *cc, char *clink, char *c_lib, char *c_inc, char *cflags, char *clinkflags ); void full_verify ( ); int main( int argc, char **argv ); double randlc( double *X, double *A ); void timer_clear( int n ); void timer_start( int n ); void timer_stop( int n ); double timer_read( int n ); /* * FUNCTION RANDLC (X, A) * * This routine returns a uniform pseudorandom double precision number in the * range (0, 1) by using the linear congruential generator * * x_{k+1} = a x_k (mod 2^46) * * where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers * before repeating. The argument A is the same as 'a' in the above formula, * and X is the same as x_0. A and X must be odd double precision integers * in the range (1, 2^46). The returned value RANDLC is normalized to be * between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain * the new seed x_1, so that subsequent calls to RANDLC using the same * arguments will generate a continuous sequence. * * This routine should produce the same results on any computer with at least * 48 mantissa bits in double precision floating point data. On Cray systems, * double precision should be disabled. * * David H. Bailey October 26, 1990 * * IMPLICIT DOUBLE PRECISION (A-H, O-Z) * SAVE KS, R23, R46, T23, T46 * DATA KS/0/ * * If this is the first call to RANDLC, compute R23 = 2 ^ -23, R46 = 2 ^ -46, * T23 = 2 ^ 23, and T46 = 2 ^ 46. These are computed in loops, rather than * by merely using the ** operator, in order to insure that the results are * exact on all systems. This code assumes that 0.5D0 is represented exactly. */ /*****************************************************************/ /************* R A N D L C ************/ /************* ************/ /************* portable random number generator ************/ /*****************************************************************/ double randlc( double *X, double *A ) { static int KS=0; static double R23, R46, T23, T46; double T1, T2, T3, T4; double A1; double A2; double X1; double X2; double Z; int i, j; if (KS == 0) { R23 = 1.0; R46 = 1.0; T23 = 1.0; T46 = 1.0; for (i=1; i<=23; i++) { R23 = 0.50 * R23; T23 = 2.0 * T23; } for (i=1; i<=46; i++) { R46 = 0.50 * R46; T46 = 2.0 * T46; } KS = 1; } /* Break A into two parts such that A = 2^23 * A1 + A2 and set X = N. */ T1 = R23 * *A; j = T1; A1 = j; A2 = *A - T23 * A1; /* Break X into two parts such that X = 2^23 * X1 + X2, compute Z = A1 * X2 + A2 * X1 (mod 2^23), and then X = 2^23 * Z + A2 * X2 (mod 2^46). */ T1 = R23 * *X; j = T1; X1 = j; X2 = *X - T23 * X1; T1 = A1 * X2 + A2 * X1; j = R23 * T1; T2 = j; Z = T1 - T23 * T2; T3 = T23 * Z + A2 * X2; j = R46 * T3; T4 = j; *X = T3 - T46 * T4; return(R46 * *X); } /*****************************************************************/ /************* C R E A T E _ S E Q ************/ /*****************************************************************/ void create_seq( double seed, double a ) { double x; int i, k; k = MAX_KEY/4; for (i=0; i key_array[i] ) j++; if( j != 0 ) { printf( "Full_verify: number of keys out of sort: %ld\n", (long)j ); } else passed_verification++; } /*****************************************************************/ /************* R A N K ****************/ /*****************************************************************/ void rank( int iteration ) { INT_TYPE i, k; INT_TYPE *key_buff_ptr, *key_buff_ptr2; #ifdef USE_BUCKETS int shift = MAX_KEY_LOG_2 - NUM_BUCKETS_LOG_2; INT_TYPE key; #endif key_array[iteration] = iteration; key_array[iteration+MAX_ITERATIONS] = MAX_KEY - iteration; /* Determine where the partial verify test keys are, load into */ /* top of array bucket_size */ for( i=0; i> shift]++; /* Accumulative bucket sizes are the bucket pointers */ bucket_ptrs[0] = 0; for( i=1; i< NUM_BUCKETS; i++ ) bucket_ptrs[i] = bucket_ptrs[i-1] + bucket_size[i-1]; /* Sort into appropriate bucket */ for( i=0; i> shift]++] = key; } key_buff_ptr2 = key_buff2; #else key_buff_ptr2 = key_array; #endif /* Clear the work array */ for( i=0; i