/* A C-program for MT19937, with initialization improved 2002/2/10. Coded by Takuji Nishimura and Makoto Matsumoto. This is a faster version by taking Shawn Cokus's optimization, Matthe Bellew's simplification, Isaku Wada's real version. David Bateman added normal and exponential distributions following Marsaglia and Tang's Ziggurat algorithm. Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, Copyright (C) 2004, David Bateman All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The names of its contributors may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Any feedback is very welcome. http://www.math.keio.ac.jp/matumoto/emt.html email: matumoto@math.keio.ac.jp 2007-09-22 Ivan Raikov * Created single-precision versions of the array generator procedures. * Removed static state and table variables and created reentrant variants of each routine. 2007-09-20 Ivan Raikov Adapted for use in Chicken Scheme. * 2006-04-01 David Bateman * * convert for use in octave, declaring static functions only used * here and adding oct_ to functions visible externally * * inverse sense of ALLBITS * 2004-01-19 Paul Kienzle * * comment out main * add init_by_entropy, get_state, set_state * * converted to allow compiling by C++ compiler * * 2004-01-25 David Bateman * * Add Marsaglia and Tsang Ziggurat code * * 2004-07-13 Paul Kienzle * * make into an independent library with some docs. * * introduce new main and test code. * * 2004-07-28 Paul Kienzle & David Bateman * * add -DALLBITS flag for 32 vs. 53 bits of randomness in mantissa * * make the naming scheme more uniform * * add -DHAVE_X86 for faster support of 53 bit mantissa on x86 arch. * * 2005-02-23 Paul Kienzle * * fix -DHAVE_X86_32 flag and add -DUSE_X86_32=0|1 for explicit control */ /* === Build instructions === Compile with -DHAVE_GETTIMEOFDAY if the gettimeofday function is available. This is not necessary if your architecture has /dev/urandom defined. Compile with -DALLBITS to disable 53-bit random numbers. This is about 50% slower than using 32-bit random numbers. Uses implicit -Di386 or explicit -DHAVE_X86_32 to determine if CPU=x86. You can force X86 behaviour with -DUSE_X86_32=1, or suppress it with -DUSE_X86_32=0. You should also consider -march=i686 or similar for extra performance. Check whether -DUSE_X86_32=0 is faster on 64-bit x86 architectures. If you want to replace the Mersenne Twister with another generator then redefine randi32 appropriately. === Usage instructions === Before using any of the generators, initialize the state with one of randmtzig_init_by_int, randmtzig_init_by_array or randmtzig_init_by_entropy. All generators share the same state vector. === Mersenne Twister === void randmtzig_init_by_int(uint32_t s) 32-bit initial state void randmtzig_init_by_array(uint32_t k[],int m) m*32-bit initial state void randmtzig_init_by_entropy(void) random initial state void randmtzig_get_state(uint32_t save[MT_N+1]) saves state in array void randmtzig_set_state(uint32_t save[MT_N+1]) restores state from array static uint32_t randmt(void) returns 32-bit unsigned int === inline generators === static uint32_t randi32(void) returns 32-bit unsigned int static uint64_t randi53(void) returns 53-bit unsigned int static uint64_t randi54(void) returns 54-bit unsigned int static uint64_t randi64(void) returns 64-bit unsigned int static double randu32(void) returns 32-bit uniform in (0,1) static double randu53(void) returns 53-bit uniform in (0,1) double randmtzig_randu(void) returns M-bit uniform in (0,1) double randmtzig_randn(void) returns M-bit standard normal double randmtzig_rande(void) returns N-bit standard exponential === Array generators === void randmtzig_fill_randi32(randmtzig_idx_type, uint32_t []) void randmtzig_fill_randi64(randmtzig_idx_type, uint64_t []) void randmtzig_fill_randu(randmtzig_idx_type, double []) void randmtzig_fill_randn(randmtzig_idx_type, double []) void randmtzig_fill_rande(randmtzig_idx_type, double []) */ #define _ISOC99_SOURCE #define _XOPEN_SOURCE #include #include #include #include typedef int randmtzig_idx_type; typedef signed char randmtzig_int8_t; typedef unsigned char randmtzig_uint8_t; typedef short randmtzig_int16_t; typedef unsigned short randmtzig_uint16_t; typedef int randmtzig_int32_t; typedef unsigned int randmtzig_uint32_t; typedef long long randmtzig_int64_t; typedef unsigned long long randmtzig_uint64_t; #define PI 3.14159265358979323846 #define MT_N 624 #define ZIGGURAT_TABLE_SIZE 256 /* FIXME may want to suppress X86 if sizeof(long)>4 */ #if !defined(USE_X86_32) # if defined(i386) || defined(HAVE_X86_32) # define USE_X86_32 1 # else # define USE_X86_32 0 # endif #endif /* ===== Mersenne Twister 32-bit generator ===== */ #define MT_M 397 #define MATRIX_A 0x9908b0dfUL /* constant vector a */ #define UMASK 0x80000000UL /* most significant w-r bits */ #define LMASK 0x7fffffffUL /* least significant r bits */ #define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) ) #define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL)) //static uint32_t *next; /* static uint32_t state[MT_N]; /\* the array for the state vector *\/ */ /* unsigned int next; */ /* static int left = 1; */ /* static int initf = 0; */ /* static int initt = 1; */ #define STATE_NEXT 0 #define STATE_LEFT 1 #define STATE_INITF 2 #define STATE_INITT 3 #define STATE_SEED 4 /* initializes state[MT_N] with a seed */ void randmtzig_init_by_int (randmtzig_uint32_t s, randmtzig_uint32_t *state) { int j; state[STATE_SEED+0] = s & 0xffffffffUL; for (j = 1; j < MT_N; j++) { state[STATE_SEED+j] = (1812433253UL * (state[STATE_SEED+j-1] ^ (state[STATE_SEED+j-1] >> 30)) + j); /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ /* In the previous versions, MSBs of the seed affect */ /* only MSBs of the array state[]. */ /* 2002/01/09 modified by Makoto Matsumoto */ state[STATE_SEED+j] &= 0xffffffffUL; /* for >32 bit machines */ } state[STATE_LEFT] = 1; state[STATE_INITF] = 1; state[STATE_INITT] = 1; } /* initialize by an array with array-length */ /* init_key is the array for initializing keys */ /* key_length is its length */ void randmtzig_init_by_array (randmtzig_uint32_t init_key[], int key_length, randmtzig_uint32_t *state) { int i, j, k; randmtzig_init_by_int (19650218UL,state); i = 1; j = 0; k = (MT_N > key_length ? MT_N : key_length); for (; k; k--) { state[STATE_SEED+i] = (state[STATE_SEED+i] ^ ((state[STATE_SEED+i-1] ^ (state[STATE_SEED+i-1] >> 30)) * 1664525UL)) + init_key[j] + j; /* non linear */ state[STATE_SEED+i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ i++; j++; if (i >= MT_N) { state[STATE_SEED+0] = state[STATE_SEED+MT_N-1]; i = 1; } if (j >= key_length) j = 0; } for (k = MT_N - 1; k; k--) { state[STATE_SEED+i] = (state[STATE_SEED+i] ^ ((state[STATE_SEED+i-1] ^ (state[STATE_SEED+i-1] >> 30)) * 1566083941UL)) - i; /* non linear */ state[STATE_SEED+i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ i++; if (i >= MT_N) { state[STATE_SEED+0] = state[STATE_SEED+MT_N-1]; i = 1; } } state[STATE_SEED+0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ state[STATE_LEFT] = 1; state[STATE_INITF] = 1; } void randmtzig_init_by_entropy (randmtzig_uint32_t *state) { randmtzig_uint32_t entropy[MT_N]; int n = 0; /* Look for entropy in /dev/urandom */ FILE* urandom =fopen("/dev/urandom", "rb"); if (urandom) { while (n < MT_N) { unsigned char word[4]; if (fread(word, 4, 1, urandom) != 1) break; entropy[n++] = word[0]+(word[1]<<8)+(word[2]<<16)+(word[3]<<24); } fclose(urandom); } /* If there isn't enough entropy, gather some from various sources */ if (n < MT_N) entropy[n++] = time(NULL); /* Current time in seconds */ if (n < MT_N) entropy[n++] = clock(); /* CPU time used (usec) */ #ifdef HAVE_GETTIMEOFDAY if (n < MT_N) { struct timeval tv; if (gettimeofday(&tv, NULL) != -1) entropy[n++] = tv.tv_usec; /* Fractional part of current time */ } #endif /* Send all the entropy into the initial state vector */ randmtzig_init_by_array(entropy,n,state); } static void next_state (randmtzig_uint32_t *state) { randmtzig_uint32_t *p = state+STATE_SEED; int j; /* if init_by_int() has not been called, */ /* a default initial seed is used */ /* if (initf==0) init_by_int(5489UL); */ /* Or better yet, a random seed! */ if (state[STATE_INITF] != 1) randmtzig_init_by_entropy(state); state[STATE_LEFT] = MT_N; state[STATE_NEXT] = 0; for (j = MT_N - MT_M + 1; --j; p++) *p = p[MT_M] ^ TWIST(p[0], p[1]); for (j = MT_M; --j; p++) *p = p[MT_M-MT_N] ^ TWIST(p[0], p[1]); *p = p[MT_M-MT_N] ^ TWIST(p[0], state[STATE_SEED+0]); } /* generates a random number on [0,0xffffffff]-interval */ static randmtzig_uint32_t randmt (randmtzig_uint32_t *state) { register randmtzig_uint32_t y; (state[STATE_LEFT])--; if (state[STATE_LEFT] == 0) next_state(state); y = state[STATE_SEED+(state[STATE_NEXT])]; (state[STATE_NEXT])++; /* Tempering */ y ^= (y >> 11); y ^= (y << 7) & 0x9d2c5680UL; y ^= (y << 15) & 0xefc60000UL; return (y ^ (y >> 18)); } /* ===== Uniform generators ===== */ /* Select which 32 bit generator to use */ #define randi32 randmt static randmtzig_uint64_t randi53 (randmtzig_uint32_t *state) { const randmtzig_uint32_t lo = randi32(state); const randmtzig_uint32_t hi = randi32(state)&0x1FFFFF; #if HAVE_X86_32 randmtzig_uint64_t u; randmtzig_uint32_t *p = (randmtzig_uint32_t *)&u; p[0] = lo; p[1] = hi; return u; #else return (((randmtzig_uint64_t)hi<<32)|lo); #endif } static randmtzig_uint64_t randi54 (randmtzig_uint32_t *state) { const randmtzig_uint32_t lo = randi32(state); const randmtzig_uint32_t hi = randi32(state)&0x3FFFFF; #if HAVE_X86_32 randmtzig_uint64_t u; randmtzig_uint32_t *p = (randmtzig_uint32_t *)&u; p[0] = lo; p[1] = hi; return u; #else return (((randmtzig_uint64_t)hi<<32)|lo); #endif } static randmtzig_uint64_t randi64 (randmtzig_uint32_t *state) { const randmtzig_uint32_t lo = randi32(state); const randmtzig_uint32_t hi = randi32(state); #if HAVE_X86_32 randmtzig_uint64_t u; randmtzig_uint32_t *p = (randmtzig_uint32_t *)&u; p[0] = lo; p[1] = hi; return u; #else return (((randmtzig_uint64_t)hi<<32)|lo); #endif } /* generates a random number on (0,1)-real-interval */ static double randu32 (randmtzig_uint32_t *state) { return ((double)randi32(state) + 0.5) * (1.0/4294967296.0); /* divided by 2^32 */ } /* generates a random number on (0,1) with 53-bit resolution */ static double randu53 (randmtzig_uint32_t *state) { const randmtzig_uint32_t a=randi32(state)>>5; const randmtzig_uint32_t b=randi32(state)>>6; return(a*67108864.0+b+0.4) * (1.0/9007199254740992.0); } /* Determine mantissa for uniform doubles */ double randmtzig_randu (randmtzig_uint32_t *state) { return randu53(state); } randmtzig_uint32_t randmtzig_randi32 (randmtzig_uint32_t *state) { return randi32(state); } /* ===== Ziggurat normal and exponential generators ===== */ # define ZIGINT randmtzig_uint64_t # define EMANTISSA 9007199254740992.0 /* 53 bit mantissa */ # define ERANDI randi53(state) /* 53 bits for mantissa */ # define NMANTISSA EMANTISSA # define NRANDI randi54(state) /* 53 bits for mantissa + 1 bit sign */ # define RANDU randu53(state) #define ZIGGURAT_TABLE_SIZE 256 #define ZIGGURAT_NOR_R 3.6541528853610088 #define ZIGGURAT_NOR_INV_R 0.27366123732975828 #define NOR_SECTION_AREA 0.00492867323399 #define ZIGGURAT_EXP_R 7.69711747013104972 #define ZIGGURAT_EXP_INV_R 0.129918765548341586 #define EXP_SECTION_AREA 0.0039496598225815571993 /* This code is based on the paper Marsaglia and Tsang, "The ziggurat method for generating random variables", Journ. Statistical Software. Code was presented in this paper for a Ziggurat of 127 levels and using a 32 bit integer random number generator. This version of the code, uses the Mersenne Twister as the integer generator and uses 256 levels in the Ziggurat. This has several advantages. 1) As Marsaglia and Tsang themselves states, the more levels the few times the expensive tail algorithm must be called 2) The cycle time of the generator is determined by the integer generator, thus the use of a Mersenne Twister for the core random generator makes this cycle extremely long. 3) The license on the original code was unclear, thus rewriting the code from the article means we are free of copyright issues. 4) Compile flag for full 53-bit random mantissa. It should be stated that the authors made my life easier, by the fact that the algorithm developed in the text of the article is for a 256 level ziggurat, even if the code itself isn't... One modification to the algorithm developed in the article, is that it is assumed that 0 <= x < Inf, and "unsigned long"s are used, thus resulting in terms like 2^32 in the code. As the normal distribution is defined between -Inf < x < Inf, we effectively only have 31 bit integers plus a sign. Thus in Marsaglia and Tsang, terms like 2^32 become 2^31. We use NMANTISSA for this term. The exponential distribution is one sided so we use the full 32 bits. We use EMANTISSA for this term. It appears that I'm slightly slower than the code in the article, this is partially due to a better generator of random integers than they use. But might also be that the case of rapid return was optimized by inlining the relevant code with a #define. As the basic Mersenne Twister is only 25% faster than this code I suspect that the main reason is just the use of the Mersenne Twister and not the inlining, so I'm not going to try and optimize further. */ /* static ZIGINT ki[ZIGGURAT_TABLE_SIZE]; */ /* static ZIGINT ke[ZIGGURAT_TABLE_SIZE]; */ /* static double wi[ZIGGURAT_TABLE_SIZE], fi[ZIGGURAT_TABLE_SIZE]; */ /* static double we[ZIGGURAT_TABLE_SIZE], fe[ZIGGURAT_TABLE_SIZE]; */ static void create_ziggurat_tables (randmtzig_uint32_t *state, ZIGINT *ki, ZIGINT *ke, double *wi, double *fi, double *we, double *fe) { int i; double x, x1; /* Ziggurat tables for the normal distribution */ x1 = ZIGGURAT_NOR_R; wi[255] = x1 / NMANTISSA; fi[255] = exp (-0.5 * x1 * x1); /* Index zero is special for tail strip, where Marsaglia and Tsang * defines this as * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1, * where v is the area of each strip of the ziggurat. */ ki[0] = (ZIGINT) (x1 * fi[255] / NOR_SECTION_AREA * NMANTISSA); wi[0] = NOR_SECTION_AREA / fi[255] / NMANTISSA; fi[0] = 1.; for (i = 254; i > 0; i--) { /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y)) */ x = sqrt(-2. * log(NOR_SECTION_AREA / x1 + fi[i+1])); ki[i+1] = (ZIGINT)(x / x1 * NMANTISSA); wi[i] = x / NMANTISSA; fi[i] = exp (-0.5 * x * x); x1 = x; } ki[1] = 0; /* Zigurrat tables for the exponential distribution */ x1 = ZIGGURAT_EXP_R; we[255] = x1 / EMANTISSA; fe[255] = exp (-x1); /* Index zero is special for tail strip, where Marsaglia and Tsang * defines this as * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1, * where v is the area of each strip of the ziggurat. */ ke[0] = (ZIGINT) (x1 * fe[255] / EXP_SECTION_AREA * EMANTISSA); we[0] = EXP_SECTION_AREA / fe[255] / EMANTISSA; fe[0] = 1.; for (i = 254; i > 0; i--) { /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus * need inverse operator of y = exp(-x) -> x = -ln(y) */ x = - log(EXP_SECTION_AREA / x1 + fe[i+1]); ke[i+1] = (ZIGINT)(x / x1 * EMANTISSA); we[i] = x / EMANTISSA; fe[i] = exp (-x); x1 = x; } ke[1] = 0; state[STATE_INITT] = 0; } /* * Here is the guts of the algorithm. As Marsaglia and Tsang state the * algorithm in their paper * * 1) Calculate a random signed integer j and let i be the index * provided by the rightmost 8-bits of j * 2) Set x = j * w_i. If j < k_i return x * 3) If i = 0, then return x from the tail * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x * 5) goto step 1 * * Where f is the functional form of the distribution, which for a normal * distribution is exp(-0.5*x*x) */ double randmtzig_randn (randmtzig_uint32_t *state, ZIGINT *ki, ZIGINT *ke, double *wi, double *fi, double *we, double *fe) { if (state[STATE_INITT] == 1) create_ziggurat_tables(state, ki, ke, wi, fi, we, fe); while (1) { /* The following code is specialized for 32-bit mantissa. * Compared to the arbitrary mantissa code, there is a performance * gain for 32-bits: PPC: 2%, MIPS: 8%, x86: 40% * There is a bigger performance gain compared to using a full * 53-bit mantissa: PPC: 60%, MIPS: 65%, x86: 240% * Of course, different compilers and operating systems may * have something to do with this. */ #if !defined(ALLBITS) # if HAVE_X86_32 /* 53-bit mantissa, 1-bit sign, x86 32-bit architecture */ double x; int si,idx; register randmtzig_uint32_t lo, hi; randmtzig_int64_t rabs; randmtzig_uint32_t *p = (randmtzig_uint32_t *)&rabs; lo = randi32(state); idx = lo&0xFF; hi = randi32(state); si = hi&UMASK; p[0] = lo; p[1] = hi&0x1FFFFF; x = ( si ? -rabs : rabs ) * wi[idx]; # else /* !HAVE_X86_32 */ /* arbitrary mantissa (selected by NRANDI, with 1 bit for sign) */ const randmtzig_uint64_t r = NRANDI; const randmtzig_int64_t rabs=r>>1; const int idx = (int)(rabs&0xFF); const double x = ( r&1 ? -rabs : rabs) * wi[idx]; # endif /* !HAVE_X86_32 */ if (rabs < (randmtzig_int64_t)ki[idx]) #else /* ALLBITS */ /* 32-bit mantissa */ const randmtzig_uint32_t r = randi32(state); const randmtzig_uint32_t rabs = r&LMASK; const int idx = (int)(r&0xFF); const double x = ((randmtzig_int32_t)r) * wi[idx]; if (rabs < ki[idx]) #endif /* ALLBITS */ return x; /* 99.3% of the time we return here 1st try */ else if (idx == 0) { /* As stated in Marsaglia and Tsang * * For the normal tail, the method of Marsaglia[5] provides: * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x, * then return r+x. Except that r+x is always in the positive * tail!!!! Any thing random might be used to determine the * sign, but as we already have r we might as well use it * * [PAK] but not the bottom 8 bits, since they are all 0 here! */ double xx, yy; do { xx = - ZIGGURAT_NOR_INV_R * log (RANDU); yy = - log (RANDU); } while ( yy+yy <= xx*xx); return (rabs&0x100 ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx); } else if ((fi[idx-1] - fi[idx]) * RANDU + fi[idx] < exp(-0.5*x*x)) return x; } } double randmtzig_rande (randmtzig_uint32_t *state, ZIGINT *ki, ZIGINT *ke, double *wi, double *fi, double *we, double *fe) { if (state[STATE_INITT] == 1) create_ziggurat_tables(state,ki,ke,wi,fi,we,fe); while (1) { ZIGINT ri = ERANDI; const int idx = (int)(ri & 0xFF); const double x = ri * we[idx]; if (ri < ke[idx]) return x; // 98.9% of the time we return here 1st try else if (idx == 0) { /* As stated in Marsaglia and Tsang * * For the exponential tail, the method of Marsaglia[5] provides: * x = r - ln(U); */ return ZIGGURAT_EXP_R - log(RANDU); } else if ((fe[idx-1] - fe[idx]) * RANDU + fe[idx] < exp(-x)) return x; } } /* ---------------------------------------------------------------- */ /* gammln - compute natural log of gamma function of xx */ static double gammln(double xx) { double x,tmp,ser; static double cof[6]={76.18009173,-86.50532033,24.01409822, -1.231739516,0.120858003e-2,-0.536382e-5}; int j; x=xx-1.0; tmp=x+5.5; tmp -= (x+0.5)*log(tmp); ser=1.0; for (j=0;j<=5;j++) { x += 1.0; ser += cof[j]/x; } return -tmp+log(2.50662827465*ser); } /* Choose a random number from a binomial distribution. Based on the bnldev function from Schaefer, Larkum, Sakmann, Roth "Coincidence detection in pyramidal neurons is tuned by their dendritic branching pattern" J. Neurophys. Miki London & Peter N. Steinmetz, Kamran Diba */ double randmtzig_randb(int nnr, double ppr, randmtzig_uint32_t *state) { int j; static int nold=(-1); double am,em,g,angle,p,bnl,sq,bt,y; static double pold=(-1.0),pc,plog,pclog,en,oldg; /* prepare to always ignore errors within this routine */ p=(ppr <= 0.5 ? ppr : 1.0-ppr); am=nnr*p; if (nnr < 25) { bnl=0.0; for (j=1;j<=nnr;j++) if (randmtzig_randu(state) < p) bnl += 1.0; } else if (am < 1.0) { g=exp(-am); bt=1.0; for (j=0;j<=nnr;j++) { bt *= randmtzig_randu (state); if (bt < g) break; } bnl=(j <= nnr ? j : nnr); } else { if (nnr != nold) { en=nnr; oldg=gammln(en+1.0); nold=nnr; } if (p != pold) { pc=1.0-p; plog=log(p); pclog=log(pc); pold=p; } sq=sqrt(2.0*am*pc); do { do { angle=PI*randmtzig_randu (state); angle=PI*randmtzig_randu (state); y=tan(angle); em=sq*y+am; } while (em < 0.0 || em >= (en+1.0)); em=floor(em); bt=1.2*sq*(1.0+y*y)*exp(oldg-gammln(em+1.0) - gammln(en-em+1.0)+em*plog+(en-em)*pclog); } while (randmtzig_randu (state) > bt); bnl=em; } if (p != ppr) bnl=nnr-bnl; return bnl; } #define RUNI(s) randmtzig_randu(s) #define RNOR(s,ki,ke,wi,fi,we,fe) randmtzig_randn(s,ki,ke,wi,fi,we,fe) #define LGAMMA lgamma /* ---- pprsc.c from Stadloeber's winrand --- */ /* flogfak(k) = ln(k!) */ static double flogfak (double k) { #define C0 9.18938533204672742e-01 #define C1 8.33333333333333333e-02 #define C3 -2.77777777777777778e-03 #define C5 7.93650793650793651e-04 #define C7 -5.95238095238095238e-04 static double logfak[30L] = { 0.00000000000000000, 0.00000000000000000, 0.69314718055994531, 1.79175946922805500, 3.17805383034794562, 4.78749174278204599, 6.57925121201010100, 8.52516136106541430, 10.60460290274525023, 12.80182748008146961, 15.10441257307551530, 17.50230784587388584, 19.98721449566188615, 22.55216385312342289, 25.19122118273868150, 27.89927138384089157, 30.67186010608067280, 33.50507345013688888, 36.39544520803305358, 39.33988418719949404, 42.33561646075348503, 45.38013889847690803, 48.47118135183522388, 51.60667556776437357, 54.78472939811231919, 58.00360522298051994, 61.26170176100200198, 64.55753862700633106, 67.88974313718153498, 71.25703896716800901 }; double r, rr; if (k >= 30.0) { r = 1.0 / k; rr = r * r; return ((k + 0.5)*log(k) - k + C0 + r*(C1 + rr*(C3 + rr*(C5 + rr*C7)))); } else return (logfak[(int)k]); } /****************************************************************** * * * Poisson Distribution - Patchwork Rejection/Inversion * * * ****************************************************************** * * * For parameter my < 10 Tabulated Inversion is applied. * * For my >= 10 Patchwork Rejection is employed: * * The area below the histogram function f(x) is rearranged in * * its body by certain point reflections. Within a large center * * interval variates are sampled efficiently by rejection from * * uniform hats. Rectangular immediate acceptance regions speed * * up the generation. The remaining tails are covered by * * exponential functions. * * * ****************************************************************** * * * FUNCTION : - pprsc samples a random number from the Poisson * * distribution with parameter my > 0. * * REFERENCE : - H. Zechner (1994): Efficient sampling from * * continuous and discrete unimodal distributions, * * Doctoral Dissertation, 156 pp., Technical * * University Graz, Austria. * * SUBPROGRAM : - drand(seed) ... (0,1)-Uniform generator with * * unsigned long integer *seed. * * * * Implemented by H. Zechner, January 1994 * * Revised by F. Niederl, July 1994 * * * ******************************************************************/ static double f (double k, double l_nu, double c_pm) { return exp(k * l_nu - flogfak(k) - c_pm); } static double pprsc (double my, randmtzig_uint32_t *state) { static double my_last = -1.0; static double m, k2, k4, k1, k5; static double dl, dr, r1, r2, r4, r5, ll, lr, l_my, c_pm, f1, f2, f4, f5, p1, p2, p3, p4, p5, p6; double Dk, X, Y; double Ds, U, V, W; if (my != my_last) { /* set-up */ my_last = my; /* approximate deviation of reflection points k2, k4 from my - 1/2 */ Ds = sqrt(my + 0.25); /* mode m, reflection points k2 and k4, and points k1 and k5, */ /* which delimit the centre region of h(x) */ m = floor(my); k2 = ceil(my - 0.5 - Ds); k4 = floor(my - 0.5 + Ds); k1 = k2 + k2 - m + 1L; k5 = k4 + k4 - m; /* range width of the critical left and right centre region */ dl = (k2 - k1); dr = (k5 - k4); /* recurrence constants r(k)=p(k)/p(k-1) at k = k1, k2, k4+1, k5+1 */ r1 = my / k1; r2 = my / k2; r4 = my / (k4 + 1.0); r5 = my / (k5 + 1.0); /* reciprocal values of the scale parameters of exp. tail envelope */ ll = log(r1); /* expon. tail left */ lr = -log(r5); /* expon. tail right*/ /* Poisson constants, necessary for computing function values f(k) */ l_my = log(my); c_pm = m * l_my - flogfak(m); /* function values f(k) = p(k)/p(m) at k = k2, k4, k1, k5 */ f2 = f(k2, l_my, c_pm); f4 = f(k4, l_my, c_pm); f1 = f(k1, l_my, c_pm); f5 = f(k5, l_my, c_pm); /* area of the two centre and the two exponential tail regions */ /* area of the two immediate acceptance regions between k2, k4 */ p1 = f2 * (dl + 1.0); /* immed. left */ p2 = f2 * dl + p1; /* centre left */ p3 = f4 * (dr + 1.0) + p2; /* immed. right */ p4 = f4 * dr + p3; /* centre right */ p5 = f1 / ll + p4; /* exp. tail left */ p6 = f5 / lr + p5; /* exp. tail right*/ } for (;;) { /* generate uniform number U -- U(0, p6) */ /* case distinction corresponding to U */ if ((U = RUNI(state) * p6) < p2) { /* centre left */ /* immediate acceptance region R2 = [k2, m) *[0, f2), X = k2, ... m -1 */ if ((V = U - p1) < 0.0) return(k2 + floor(U/f2)); /* immediate acceptance region R1 = [k1, k2)*[0, f1), X = k1, ... k2-1 */ if ((W = V / dl) < f1 ) return(k1 + floor(V/f1)); /* computation of candidate X < k2, and its counterpart Y > k2 */ /* either squeeze-acceptance of X or acceptance-rejection of Y */ Dk = floor(dl * RUNI(state)) + 1.0; if (W <= f2 - Dk * (f2 - f2/r2)) { /* quick accept of */ return(k2 - Dk); /* X = k2 - Dk */ } if ((V = f2 + f2 - W) < 1.0) { /* quick reject of Y*/ Y = k2 + Dk; if (V <= f2 + Dk * (1.0 - f2)/(dl + 1.0)) { /* quick accept of */ return(Y); /* Y = k2 + Dk */ } if (V <= f(Y, l_my, c_pm)) return(Y); /* final accept of Y*/ } X = k2 - Dk; } else if (U < p4) { /* centre right */ /* immediate acceptance region R3 = [m, k4+1)*[0, f4), X = m, ... k4 */ if ((V = U - p3) < 0.0) return(k4 - floor((U - p2)/f4)); /* immediate acceptance region R4 = [k4+1, k5+1)*[0, f5) */ if ((W = V / dr) < f5 ) return(k5 - floor(V/f5)); /* computation of candidate X > k4, and its counterpart Y < k4 */ /* either squeeze-acceptance of X or acceptance-rejection of Y */ Dk = floor(dr * RUNI(state)) + 1.0; if (W <= f4 - Dk * (f4 - f4*r4)) { /* quick accept of */ return(k4 + Dk); /* X = k4 + Dk */ } if ((V = f4 + f4 - W) < 1.0) { /* quick reject of Y*/ Y = k4 - Dk; if (V <= f4 + Dk * (1.0 - f4)/ dr) { /* quick accept of */ return(Y); /* Y = k4 - Dk */ } if (V <= f(Y, l_my, c_pm)) return(Y); /* final accept of Y*/ } X = k4 + Dk; } else { W = RUNI(state); if (U < p5) { /* expon. tail left */ Dk = floor(1.0 - log(W)/ll); if ((X = k1 - Dk) < 0L) continue; /* 0 <= X <= k1 - 1 */ W *= (U - p4) * ll; /* W -- U(0, h(x)) */ if (W <= f1 - Dk * (f1 - f1/r1)) return(X); /* quick accept of X*/ } else { /* expon. tail right*/ Dk = floor(1.0 - log(W)/lr); X = k5 + Dk; /* X >= k5 + 1 */ W *= (U - p5) * lr; /* W -- U(0, h(x)) */ if (W <= f5 - Dk * (f5 - f5*r5)) return(X); /* quick accept of X*/ } } /* acceptance-rejection test of candidate X from the original area */ /* test, whether W <= f(k), with W = U*h(x) and U -- U(0, 1)*/ /* log f(X) = (X - m)*log(my) - log X! + log m! */ if (log(W) <= X * l_my - flogfak(X) - c_pm) return(X); } } /* ---- pprsc.c end ------ */ /* The remainder of the file is by Paul Kienzle */ /* Given uniform u, find x such that CDF(L,x)==u. Return x. */ static void poisson_cdf_lookup(double lambda, double *p, size_t n, randmtzig_uint32_t *state) { /* Table size is predicated on the maximum value of lambda * we want to store in the table, and the maximum value of * returned by the uniform random number generator on [0,1). * With lambda==10 and u_max = 1 - 1/(2^32+1), we * have poisson_pdf(lambda,36) < 1-u_max. If instead our * generator uses more bits of mantissa or returns a value * in the range [0,1], then for lambda==10 we need a table * size of 46 instead. For long doubles, the table size * will need to be longer still. */ #define TABLESIZE 46 double t[TABLESIZE]; /* Precompute the table for the u up to and including 0.458. * We will almost certainly need it. */ int intlambda = (int)floor(lambda); double P; int tableidx; size_t i = n; t[0] = P = exp(-lambda); for (tableidx = 1; tableidx <= intlambda; tableidx++) { P = P*lambda/(double)tableidx; t[tableidx] = t[tableidx-1] + P; } while (i-- > 0) { double u = RUNI(state); /* If u > 0.458 we know we can jump to floor(lambda) before * comparing (this observation is based on Stadlober's winrand * code). For lambda >= 1, this will be a win. Lambda < 1 * is already fast, so adding an extra comparison is not a * problem. */ int k = (u > 0.458 ? intlambda : 0); /* We aren't using a for loop here because when we find the * right k we want to jump to the next iteration of the * outer loop, and the continue statement will only work for * the inner loop. */ nextk: if ( u <= t[k] ) { p[i] = (double) k; continue; } if (++k < tableidx) goto nextk; /* We only need high values of the table very rarely so we * don't automatically compute the entire table. */ while (tableidx < TABLESIZE) { P = P*lambda/(double)tableidx; t[tableidx] = t[tableidx-1] + P; /* Make sure we converge to 1.0 just in case u is uniform * on [0,1] rather than [0,1). */ if (t[tableidx] == t[tableidx-1]) t[tableidx] = 1.0; tableidx++; if (u <= t[tableidx-1]) break; } /* We are assuming that the table size is big enough here. * This should be true even if RUNI is returning values in * the range [0,1] rather than [0,1). */ p[i] = (double)(tableidx-1); } } /* From Press, et al., Numerical Recipes */ static void poisson_rejection (double lambda, double *p, size_t n, randmtzig_uint32_t *state) { double sq = sqrt(2.0*lambda); double alxm = log(lambda); double g = lambda*alxm - LGAMMA(lambda+1.0); size_t i; for (i = 0; i < n; i++) { double y, em, t; do { do { y = tan(M_PI*RUNI(state)); em = sq * y + lambda; } while (em < 0.0); em = floor(em); t = 0.9*(1.0+y*y)*exp(em*alxm-flogfak(em)-g); } while (RUNI(state) > t); p[i] = em; } } /* Generate one poisson variate */ double randmtzig_randp (double L, randmtzig_uint32_t *state, ZIGINT *ki, ZIGINT *ke, double *wi, double *fi, double *we, double *fe) { double ret; if (L < 0.0) ret = NAN; else if (L <= 12.0) { /* From Press, et al. Numerical recipes */ double g = exp(-L); int em = -1; double t = 1.0; do { ++em; t *= RUNI(state); } while (t > g); ret = em; } else if (L <= 1e8) { /* numerical recipes */ poisson_rejection(L, &ret, 1,state); } else if (isinf(L)) { /* FIXME R uses NaN, but the normal approx. suggests that as * limit should be inf. Which is correct? */ ret = NAN; } else { /* normal approximation: from Phys. Rev. D (1994) v50 p1284 */ ret = floor(RNOR(state,ki,ke,wi,fi,we,fe)*sqrt(L) + L + 0.5); if (ret < 0.0) ret = 0.0; /* will probably never happen */ } return ret; } /* Array generators */ void randmtzig_fill_drandu (randmtzig_idx_type n, double *p, randmtzig_uint32_t *state) { randmtzig_idx_type i; for (i = 0; i < n; i++) p[i] = randmtzig_randu(state); } void randmtzig_fill_drandn (randmtzig_idx_type n, double *p, randmtzig_uint32_t *state, ZIGINT *ki, ZIGINT *ke, double *wi, double *fi, double *we, double *fe) { randmtzig_idx_type i; for (i = 0; i < n; i++) p[i] = randmtzig_randn(state,ki,ke,wi,fi,we,fe); } void randmtzig_fill_drande (randmtzig_idx_type n, double *p, randmtzig_uint32_t *state, ZIGINT *ki, ZIGINT *ke, double *wi, double *fi, double *we, double *fe) { randmtzig_idx_type i; for (i = 0; i < n; i++) p[i] = randmtzig_rande(state,ki,ke,wi,fi,we,fe); } void randmtzig_fill_drandb (int nnr, double ppr, randmtzig_idx_type n, double *v, randmtzig_uint32_t *state) { randmtzig_idx_type i; for (i = 0; i < n; i++) v[i] = randmtzig_randb(nnr, ppr, state); } void randmtzig_fill_srandu (randmtzig_idx_type n, float *p, randmtzig_uint32_t *state) { randmtzig_idx_type i; for (i = 0; i < n; i++) p[i] = randmtzig_randu(state); } void randmtzig_fill_srandn (randmtzig_idx_type n, float *p, randmtzig_uint32_t *state, ZIGINT *ki, ZIGINT *ke, double *wi, double *fi, double *we, double *fe) { randmtzig_idx_type i; for (i = 0; i < n; i++) p[i] = randmtzig_randn(state,ki,ke,wi,fi,we,fe); } void randmtzig_fill_srande (randmtzig_idx_type n, float *p, randmtzig_uint32_t *state, ZIGINT *ki, ZIGINT *ke, double *wi, double *fi, double *we, double *fe) { randmtzig_idx_type i; for (i = 0; i < n; i++) p[i] = randmtzig_rande(state,ki,ke,wi,fi,we,fe); } void randmtzig_fill_srandb (int nnr, float ppr, randmtzig_idx_type n, float *v, randmtzig_uint32_t *state) { randmtzig_idx_type i; for (i = 0; i < n; i++) v[i] = randmtzig_randb(nnr, ppr, state); } /* The cutoff of L <= 1e8 in the following two functions before using * the normal approximation is based on: * > L=1e8; x=floor(linspace(0,2*L,1000)); * > max(abs(normal_pdf(x,L,L)-poisson_pdf(x,L))) * ans = 1.1376e-28 * For L=1e7, the max is around 1e-9, which is within the step size of RUNI. * For L>1e10 the pprsc function breaks down, as I saw from the histogram * of a large sample, so 1e8 is both small enough and large enough. */ /* Generate a set of poisson numbers with the same distribution */ void randmtzig_fill_drandp (double L, randmtzig_idx_type n, double *p, randmtzig_uint32_t *state, ZIGINT *ki, ZIGINT *ke, double *wi, double *fi, double *we, double *fe) { randmtzig_idx_type i; if (L < 0.0 || isinf(L)) { for (i=0; i 0) { float u = RUNI(state); /* If u > 0.458 we know we can jump to floor(lambda) before * comparing (this observation is based on Stadlober's winrand * code). For lambda >= 1, this will be a win. Lambda < 1 * is already fast, so adding an extra comparison is not a * problem. */ int k = (u > 0.458 ? intlambda : 0); /* We aren't using a for loop here because when we find the * right k we want to jump to the next iteration of the * outer loop, and the continue statement will only work for * the inner loop. */ nextk: if ( u <= t[k] ) { p[i] = (float) k; continue; } if (++k < tableidx) goto nextk; /* We only need high values of the table very rarely so we * don't automatically compute the entire table. */ while (tableidx < TABLESIZE) { P = P*lambda/(float)tableidx; t[tableidx] = t[tableidx-1] + P; /* Make sure we converge to 1.0 just in case u is uniform * on [0,1] rather than [0,1). */ if (t[tableidx] == t[tableidx-1]) t[tableidx] = 1.0; tableidx++; if (u <= t[tableidx-1]) break; } /* We are assuming that the table size is big enough here. * This should be true even if RUNI is returning values in * the range [0,1] rather than [0,1). */ p[i] = (float)(tableidx-1); } } void randmtzig_fill_srandp (double L, randmtzig_idx_type n, float *p, randmtzig_uint32_t *state, ZIGINT *ki, ZIGINT *ke, double *wi, double *fi, double *we, double *fe) { randmtzig_idx_type i; if (L < 0.0 || isinf(L)) { for (i=0; i