5742
|
1 /* |
|
2 A C-program for MT19937, with initialization improved 2002/2/10. |
|
3 Coded by Takuji Nishimura and Makoto Matsumoto. |
|
4 This is a faster version by taking Shawn Cokus's optimization, |
|
5 Matthe Bellew's simplification, Isaku Wada's real version. |
|
6 David Bateman added normal and exponential distributions following |
|
7 Marsaglia and Tang's Ziggurat algorithm. |
|
8 |
|
9 Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, |
|
10 Copyright (C) 2004, David Bateman |
|
11 All rights reserved. |
|
12 |
|
13 Redistribution and use in source and binary forms, with or without |
|
14 modification, are permitted provided that the following conditions |
|
15 are met: |
|
16 |
|
17 1. Redistributions of source code must retain the above copyright |
|
18 notice, this list of conditions and the following disclaimer. |
|
19 |
|
20 2. Redistributions in binary form must reproduce the above copyright |
|
21 notice, this list of conditions and the following disclaimer in the |
|
22 documentation and/or other materials provided with the distribution. |
|
23 |
|
24 3. The names of its contributors may not be used to endorse or promote |
|
25 products derived from this software without specific prior written |
|
26 permission. |
|
27 |
|
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
29 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
30 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
31 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
|
32 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
|
33 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
|
34 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
|
35 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
|
36 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
|
37 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|
38 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
39 |
|
40 |
|
41 Any feedback is very welcome. |
|
42 http://www.math.keio.ac.jp/matumoto/emt.html |
|
43 email: matumoto@math.keio.ac.jp |
|
44 |
|
45 * 2006-04-01 David Bateman |
|
46 * * convert for use in octave, declaring static functions only used |
|
47 * here and adding oct_ to functions visible externally |
|
48 * * inverse sense of ALLBITS |
|
49 * 2004-01-19 Paul Kienzle |
|
50 * * comment out main |
|
51 * add init_by_entropy, get_state, set_state |
|
52 * * converted to allow compiling by C++ compiler |
|
53 * |
|
54 * 2004-01-25 David Bateman |
|
55 * * Add Marsaglia and Tsang Ziggurat code |
|
56 * |
|
57 * 2004-07-13 Paul Kienzle |
|
58 * * make into an independent library with some docs. |
|
59 * * introduce new main and test code. |
|
60 * |
|
61 * 2004-07-28 Paul Kienzle & David Bateman |
|
62 * * add -DALLBITS flag for 32 vs. 53 bits of randomness in mantissa |
|
63 * * make the naming scheme more uniform |
|
64 * * add -DHAVE_X86 for faster support of 53 bit mantissa on x86 arch. |
|
65 * |
|
66 * 2005-02-23 Paul Kienzle |
|
67 * * fix -DHAVE_X86_32 flag and add -DUSE_X86_32=0|1 for explicit control |
|
68 */ |
|
69 |
|
70 /* |
|
71 === Build instructions === |
|
72 |
|
73 Compile with -DHAVE_GETTIMEOFDAY if the gettimeofday function is |
|
74 available. This is not necessary if your architecture has |
|
75 /dev/urandom defined. |
|
76 |
|
77 Compile with -DALLBITS to disable 53-bit random numbers. This is about |
|
78 50% slower than using 32-bit random numbers. |
|
79 |
|
80 Uses implicit -Di386 or explicit -DHAVE_X86_32 to determine if CPU=x86. |
|
81 You can force X86 behaviour with -DUSE_X86_32=1, or suppress it with |
|
82 -DUSE_X86_32=0. You should also consider -march=i686 or similar for |
|
83 extra performance. Check whether -DUSE_X86_32=0 is faster on 64-bit |
|
84 x86 architectures. |
|
85 |
|
86 If you want to replace the Mersenne Twister with another |
|
87 generator then redefine randi32 appropriately. |
|
88 |
|
89 === Usage instructions === |
|
90 Before using any of the generators, initialize the state with one of |
|
91 oct_init_by_int, oct_init_by_array or oct_init_by_entropy. |
|
92 |
|
93 All generators share the same state vector. |
|
94 |
|
95 === Mersenne Twister === |
|
96 void oct_init_by_int(uint32_t s) 32-bit initial state |
|
97 void oct_init_by_array(uint32_t k[],int m) m*32-bit initial state |
|
98 void oct_init_by_entropy(void) random initial state |
|
99 void oct_get_state(uint32_t save[MT_N+1]) saves state in array |
|
100 void oct_set_state(uint32_t save[MT_N+1]) restores state from array |
5766
|
101 static uint32_t randmt(void) returns 32-bit unsigned int |
5742
|
102 |
|
103 === inline generators === |
5766
|
104 static uint32_t randi32(void) returns 32-bit unsigned int |
|
105 static uint64_t randi53(void) returns 53-bit unsigned int |
|
106 static uint64_t randi54(void) returns 54-bit unsigned int |
|
107 static uint64_t randi64(void) returns 64-bit unsigned int |
|
108 static double randu32(void) returns 32-bit uniform in (0,1) |
|
109 static double randu53(void) returns 53-bit uniform in (0,1) |
5742
|
110 |
|
111 double oct_randu(void) returns M-bit uniform in (0,1) |
|
112 double oct_randn(void) returns M-bit standard normal |
|
113 double oct_rande(void) returns N-bit standard exponential |
|
114 |
|
115 === Array generators === |
|
116 void oct_fill_randi32(octave_idx_type, uint32_t []) |
|
117 void oct_fill_randi64(octave_idx_type, uint64_t []) |
|
118 void oct_fill_randu(octave_idx_type, double []) |
|
119 void oct_fill_randn(octave_idx_type, double []) |
|
120 void oct_fill_rande(octave_idx_type, double []) |
|
121 |
|
122 */ |
|
123 |
|
124 #if defined (HAVE_CONFIG_H) |
|
125 #include <config.h> |
|
126 #endif |
|
127 |
|
128 #include <math.h> |
|
129 #include <stdio.h> |
|
130 #include <time.h> |
|
131 |
|
132 #ifdef HAVE_GETTIMEOFDAY |
|
133 #include <sys/time.h> |
|
134 #endif |
|
135 |
|
136 #include "randmtzig.h" |
|
137 |
|
138 /* XXX FIXME XXX may want to suppress X86 if sizeof(long)>4 */ |
|
139 #if !defined(USE_X86_32) |
|
140 # if defined(i386) || defined(HAVE_X86_32) |
|
141 # define USE_X86_32 1 |
|
142 # else |
|
143 # define USE_X86_32 0 |
|
144 # endif |
|
145 #endif |
|
146 |
|
147 /* ===== Mersenne Twister 32-bit generator ===== */ |
|
148 |
|
149 #define MT_M 397 |
|
150 #define MATRIX_A 0x9908b0dfUL /* constant vector a */ |
|
151 #define UMASK 0x80000000UL /* most significant w-r bits */ |
|
152 #define LMASK 0x7fffffffUL /* least significant r bits */ |
|
153 #define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) ) |
|
154 #define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL)) |
|
155 |
|
156 static uint32_t *next; |
|
157 static uint32_t state[MT_N]; /* the array for the state vector */ |
|
158 static int left = 1; |
|
159 static int initf = 0; |
|
160 static int initt = 1; |
|
161 |
|
162 /* initializes state[MT_N] with a seed */ |
|
163 void |
|
164 oct_init_by_int (uint32_t s) |
|
165 { |
|
166 int j; |
|
167 state[0] = s & 0xffffffffUL; |
|
168 for (j = 1; j < MT_N; j++) { |
|
169 state[j] = (1812433253UL * (state[j-1] ^ (state[j-1] >> 30)) + j); |
|
170 /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ |
|
171 /* In the previous versions, MSBs of the seed affect */ |
|
172 /* only MSBs of the array state[]. */ |
|
173 /* 2002/01/09 modified by Makoto Matsumoto */ |
|
174 state[j] &= 0xffffffffUL; /* for >32 bit machines */ |
|
175 } |
|
176 left = 1; |
|
177 initf = 1; |
|
178 } |
|
179 |
|
180 /* initialize by an array with array-length */ |
|
181 /* init_key is the array for initializing keys */ |
|
182 /* key_length is its length */ |
|
183 void |
|
184 oct_init_by_array (uint32_t init_key[], int key_length) |
|
185 { |
|
186 int i, j, k; |
|
187 oct_init_by_int (19650218UL); |
|
188 i = 1; |
|
189 j = 0; |
|
190 k = (MT_N > key_length ? MT_N : key_length); |
|
191 for (; k; k--) |
|
192 { |
|
193 state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1664525UL)) |
|
194 + init_key[j] + j; /* non linear */ |
|
195 state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ |
|
196 i++; |
|
197 j++; |
|
198 if (i >= MT_N) |
|
199 { |
|
200 state[0] = state[MT_N-1]; |
|
201 i = 1; |
|
202 } |
|
203 if (j >= key_length) |
|
204 j = 0; |
|
205 } |
|
206 for (k = MT_N - 1; k; k--) |
|
207 { |
|
208 state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1566083941UL)) |
|
209 - i; /* non linear */ |
|
210 state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ |
|
211 i++; |
|
212 if (i >= MT_N) |
|
213 { |
|
214 state[0] = state[MT_N-1]; |
|
215 i = 1; |
|
216 } |
|
217 } |
|
218 |
|
219 state[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ |
|
220 left = 1; |
|
221 initf = 1; |
|
222 } |
|
223 |
|
224 void |
|
225 oct_init_by_entropy (void) |
|
226 { |
|
227 uint32_t entropy[MT_N]; |
|
228 int n = 0; |
|
229 |
|
230 /* Look for entropy in /dev/urandom */ |
|
231 FILE* urandom =fopen("/dev/urandom", "rb"); |
|
232 if (urandom) |
|
233 { |
|
234 while (n < MT_N) |
|
235 { |
|
236 unsigned char word[4]; |
|
237 if (fread(word, 4, 1, urandom) != 1) |
|
238 break; |
|
239 entropy[n++] = word[0]+(word[1]<<8)+(word[2]<<16)+(word[3]<<24); |
|
240 } |
|
241 fclose(urandom); |
|
242 } |
|
243 |
|
244 /* If there isn't enough entropy, gather some from various sources */ |
|
245 if (n < MT_N) |
|
246 entropy[n++] = time(NULL); /* Current time in seconds */ |
|
247 if (n < MT_N) |
|
248 entropy[n++] = clock(); /* CPU time used (usec) */ |
|
249 #ifdef HAVE_GETTIMEOFDAY |
|
250 if (n < MT_N) |
|
251 { |
|
252 struct timeval tv; |
|
253 if (gettimeofday(&tv, NULL) != -1) |
|
254 entropy[n++] = tv.tv_usec; /* Fractional part of current time */ |
|
255 } |
|
256 #endif |
|
257 /* Send all the entropy into the initial state vector */ |
|
258 oct_init_by_array(entropy,n); |
|
259 } |
|
260 |
|
261 void |
|
262 oct_set_state (uint32_t save[]) |
|
263 { |
|
264 int i; |
|
265 for (i=0; i < MT_N; i++) |
|
266 state[i] = save[i]; |
|
267 left = save[MT_N]; |
|
268 next = state + (MT_N - left + 1); |
|
269 } |
|
270 |
|
271 void |
|
272 oct_get_state (uint32_t save[]) |
|
273 { |
|
274 int i; |
|
275 for (i = 0; i < MT_N; i++) |
|
276 save[i] = state[i]; |
|
277 save[MT_N] = left; |
|
278 } |
|
279 |
|
280 static void |
|
281 next_state (void) |
|
282 { |
|
283 uint32_t *p = state; |
|
284 int j; |
|
285 |
|
286 /* if init_by_int() has not been called, */ |
|
287 /* a default initial seed is used */ |
|
288 /* if (initf==0) init_by_int(5489UL); */ |
|
289 /* Or better yet, a random seed! */ |
|
290 if (initf == 0) |
|
291 oct_init_by_entropy(); |
|
292 |
|
293 left = MT_N; |
|
294 next = state; |
|
295 |
|
296 for (j = MT_N - MT_M + 1; --j; p++) |
|
297 *p = p[MT_M] ^ TWIST(p[0], p[1]); |
|
298 |
|
299 for (j = MT_M; --j; p++) |
|
300 *p = p[MT_M-MT_N] ^ TWIST(p[0], p[1]); |
|
301 |
|
302 *p = p[MT_M-MT_N] ^ TWIST(p[0], state[0]); |
|
303 } |
|
304 |
|
305 /* generates a random number on [0,0xffffffff]-interval */ |
5766
|
306 static uint32_t |
5742
|
307 randmt (void) |
|
308 { |
|
309 register uint32_t y; |
|
310 |
|
311 if (--left == 0) |
|
312 next_state(); |
|
313 y = *next++; |
|
314 |
|
315 /* Tempering */ |
|
316 y ^= (y >> 11); |
|
317 y ^= (y << 7) & 0x9d2c5680UL; |
|
318 y ^= (y << 15) & 0xefc60000UL; |
|
319 return (y ^ (y >> 18)); |
|
320 } |
|
321 |
|
322 /* ===== Uniform generators ===== */ |
|
323 |
|
324 /* Select which 32 bit generator to use */ |
|
325 #define randi32 randmt |
|
326 |
5766
|
327 static uint64_t |
5742
|
328 randi53 (void) |
|
329 { |
|
330 const uint32_t lo = randi32(); |
|
331 const uint32_t hi = randi32()&0x1FFFFF; |
|
332 #if HAVE_X86_32 |
|
333 uint64_t u; |
|
334 uint32_t *p = (uint32_t *)&u; |
|
335 p[0] = lo; |
|
336 p[1] = hi; |
|
337 return u; |
|
338 #else |
|
339 return (((uint64_t)hi<<32)|lo); |
|
340 #endif |
|
341 } |
|
342 |
5766
|
343 static uint64_t |
5742
|
344 randi54 (void) |
|
345 { |
|
346 const uint32_t lo = randi32(); |
|
347 const uint32_t hi = randi32()&0x3FFFFF; |
|
348 #if HAVE_X86_32 |
|
349 uint64_t u; |
|
350 uint32_t *p = (uint32_t *)&u; |
|
351 p[0] = lo; |
|
352 p[1] = hi; |
|
353 return u; |
|
354 #else |
|
355 return (((uint64_t)hi<<32)|lo); |
|
356 #endif |
|
357 } |
|
358 |
5766
|
359 static uint64_t |
5742
|
360 randi64 (void) |
|
361 { |
|
362 const uint32_t lo = randi32(); |
|
363 const uint32_t hi = randi32(); |
|
364 #if HAVE_X86_32 |
|
365 uint64_t u; |
|
366 uint32_t *p = (uint32_t *)&u; |
|
367 p[0] = lo; |
|
368 p[1] = hi; |
|
369 return u; |
|
370 #else |
|
371 return (((uint64_t)hi<<32)|lo); |
|
372 #endif |
|
373 } |
|
374 |
|
375 /* generates a random number on (0,1)-real-interval */ |
5766
|
376 static double |
5742
|
377 randu32 (void) |
|
378 { |
|
379 return ((double)randi32() + 0.5) * (1.0/4294967296.0); |
|
380 /* divided by 2^32 */ |
|
381 } |
|
382 |
|
383 /* generates a random number on (0,1) with 53-bit resolution */ |
5766
|
384 static double |
5742
|
385 randu53 (void) |
|
386 { |
|
387 const uint32_t a=randi32()>>5; |
|
388 const uint32_t b=randi32()>>6; |
|
389 return(a*67108864.0+b+0.4) * (1.0/9007199254740992.0); |
|
390 } |
|
391 |
|
392 /* Determine mantissa for uniform doubles */ |
|
393 #ifdef ALLBITS |
|
394 double |
|
395 oct_randu (void) |
|
396 { |
|
397 return randu32(); |
|
398 } |
|
399 #else |
|
400 double |
|
401 oct_randu (void) |
|
402 { |
|
403 return randu53(); |
|
404 } |
|
405 #endif |
|
406 |
|
407 /* ===== Ziggurat normal and exponential generators ===== */ |
|
408 #ifdef ALLBITS |
|
409 # define ZIGINT uint32_t |
|
410 # define EMANTISSA 4294967296.0 /* 32 bit mantissa */ |
|
411 # define ERANDI randi32() /* 32 bits for mantissa */ |
|
412 # define NMANTISSA 2147483648.0 /* 31 bit mantissa */ |
|
413 # define NRANDI randi32() /* 31 bits for mantissa + 1 bit sign */ |
|
414 # define RANDU randu32() |
|
415 #else |
|
416 # define ZIGINT uint64_t |
|
417 # define EMANTISSA 9007199254740992.0 /* 53 bit mantissa */ |
|
418 # define ERANDI randi53() /* 53 bits for mantissa */ |
|
419 # define NMANTISSA EMANTISSA |
|
420 # define NRANDI randi54() /* 53 bits for mantissa + 1 bit sign */ |
|
421 # define RANDU randu53() |
|
422 #endif |
|
423 |
|
424 #define ZIGGURAT_TABLE_SIZE 256 |
|
425 |
|
426 #define ZIGGURAT_NOR_R 3.6541528853610088 |
|
427 #define ZIGGURAT_NOR_INV_R 0.27366123732975828 |
|
428 #define NOR_SECTION_AREA 0.00492867323399 |
|
429 |
|
430 #define ZIGGURAT_EXP_R 7.69711747013104972 |
|
431 #define ZIGGURAT_EXP_INV_R 0.129918765548341586 |
|
432 #define EXP_SECTION_AREA 0.0039496598225815571993 |
|
433 |
|
434 static ZIGINT ki[ZIGGURAT_TABLE_SIZE]; |
|
435 static double wi[ZIGGURAT_TABLE_SIZE], fi[ZIGGURAT_TABLE_SIZE]; |
|
436 static ZIGINT ke[ZIGGURAT_TABLE_SIZE]; |
|
437 static double we[ZIGGURAT_TABLE_SIZE], fe[ZIGGURAT_TABLE_SIZE]; |
|
438 |
|
439 /* |
|
440 This code is based on the paper Marsaglia and Tsang, "The ziggurat method |
|
441 for generating random variables", Journ. Statistical Software. Code was |
|
442 presented in this paper for a Ziggurat of 127 levels and using a 32 bit |
|
443 integer random number generator. This version of the code, uses the |
|
444 Mersenne Twister as the integer generator and uses 256 levels in the |
|
445 Ziggurat. This has several advantages. |
|
446 |
|
447 1) As Marsaglia and Tsang themselves states, the more levels the few |
|
448 times the expensive tail algorithm must be called |
|
449 2) The cycle time of the generator is determined by the integer |
|
450 generator, thus the use of a Mersenne Twister for the core random |
|
451 generator makes this cycle extremely long. |
|
452 3) The license on the original code was unclear, thus rewriting the code |
|
453 from the article means we are free of copyright issues. |
|
454 4) Compile flag for full 53-bit random mantissa. |
|
455 |
|
456 It should be stated that the authors made my life easier, by the fact that |
|
457 the algorithm developed in the text of the article is for a 256 level |
|
458 ziggurat, even if the code itself isn't... |
|
459 |
|
460 One modification to the algorithm developed in the article, is that it is |
|
461 assumed that 0 <= x < Inf, and "unsigned long"s are used, thus resulting in |
|
462 terms like 2^32 in the code. As the normal distribution is defined between |
|
463 -Inf < x < Inf, we effectively only have 31 bit integers plus a sign. Thus |
|
464 in Marsaglia and Tsang, terms like 2^32 become 2^31. We use NMANTISSA for |
|
465 this term. The exponential distribution is one sided so we use the |
|
466 full 32 bits. We use EMANTISSA for this term. |
|
467 |
|
468 It appears that I'm slightly slower than the code in the article, this |
|
469 is partially due to a better generator of random integers than they |
|
470 use. But might also be that the case of rapid return was optimized by |
|
471 inlining the relevant code with a #define. As the basic Mersenne |
|
472 Twister is only 25% faster than this code I suspect that the main |
|
473 reason is just the use of the Mersenne Twister and not the inlining, |
|
474 so I'm not going to try and optimize further. |
|
475 */ |
|
476 |
|
477 static void |
|
478 create_ziggurat_tables (void) |
|
479 { |
|
480 int i; |
|
481 double x, x1; |
|
482 |
|
483 /* Ziggurat tables for the normal distribution */ |
|
484 x1 = ZIGGURAT_NOR_R; |
|
485 wi[255] = x1 / NMANTISSA; |
|
486 fi[255] = exp (-0.5 * x1 * x1); |
|
487 |
|
488 /* Index zero is special for tail strip, where Marsaglia and Tsang |
|
489 * defines this as |
|
490 * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1, |
|
491 * where v is the area of each strip of the ziggurat. |
|
492 */ |
|
493 ki[0] = (ZIGINT) (x1 * fi[255] / NOR_SECTION_AREA * NMANTISSA); |
|
494 wi[0] = NOR_SECTION_AREA / fi[255] / NMANTISSA; |
|
495 fi[0] = 1.; |
|
496 |
|
497 for (i = 254; i > 0; i--) |
|
498 { |
|
499 /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus |
|
500 * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y)) |
|
501 */ |
|
502 x = sqrt(-2. * log(NOR_SECTION_AREA / x1 + fi[i+1])); |
|
503 ki[i+1] = (ZIGINT)(x / x1 * NMANTISSA); |
|
504 wi[i] = x / NMANTISSA; |
|
505 fi[i] = exp (-0.5 * x * x); |
|
506 x1 = x; |
|
507 } |
|
508 |
|
509 ki[1] = 0; |
|
510 |
|
511 /* Zigurrat tables for the exponential distribution */ |
|
512 x1 = ZIGGURAT_EXP_R; |
|
513 we[255] = x1 / EMANTISSA; |
|
514 fe[255] = exp (-x1); |
|
515 |
|
516 /* Index zero is special for tail strip, where Marsaglia and Tsang |
|
517 * defines this as |
|
518 * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1, |
|
519 * where v is the area of each strip of the ziggurat. |
|
520 */ |
|
521 ke[0] = (ZIGINT) (x1 * fe[255] / EXP_SECTION_AREA * EMANTISSA); |
|
522 we[0] = EXP_SECTION_AREA / fe[255] / EMANTISSA; |
|
523 fe[0] = 1.; |
|
524 |
|
525 for (i = 254; i > 0; i--) |
|
526 { |
|
527 /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus |
|
528 * need inverse operator of y = exp(-x) -> x = -ln(y) |
|
529 */ |
|
530 x = - log(EXP_SECTION_AREA / x1 + fe[i+1]); |
|
531 ke[i+1] = (ZIGINT)(x / x1 * EMANTISSA); |
|
532 we[i] = x / EMANTISSA; |
|
533 fe[i] = exp (-x); |
|
534 x1 = x; |
|
535 } |
|
536 ke[1] = 0; |
|
537 |
|
538 initt = 0; |
|
539 } |
|
540 |
|
541 /* |
|
542 * Here is the guts of the algorithm. As Marsaglia and Tsang state the |
|
543 * algorithm in their paper |
|
544 * |
|
545 * 1) Calculate a random signed integer j and let i be the index |
|
546 * provided by the rightmost 8-bits of j |
|
547 * 2) Set x = j * w_i. If j < k_i return x |
|
548 * 3) If i = 0, then return x from the tail |
|
549 * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x |
|
550 * 5) goto step 1 |
|
551 * |
|
552 * Where f is the functional form of the distribution, which for a normal |
|
553 * distribution is exp(-0.5*x*x) |
|
554 */ |
|
555 |
|
556 double |
|
557 oct_randn (void) |
|
558 { |
|
559 if (initt) |
|
560 create_ziggurat_tables(); |
|
561 |
|
562 while (1) |
|
563 { |
|
564 /* The following code is specialized for 32-bit mantissa. |
|
565 * Compared to the arbitrary mantissa code, there is a performance |
|
566 * gain for 32-bits: PPC: 2%, MIPS: 8%, x86: 40% |
|
567 * There is a bigger performance gain compared to using a full |
|
568 * 53-bit mantissa: PPC: 60%, MIPS: 65%, x86: 240% |
|
569 * Of course, different compilers and operating systems may |
|
570 * have something to do with this. |
|
571 */ |
|
572 #if !defined(ALLBITS) |
|
573 # if HAVE_X86_32 |
|
574 /* 53-bit mantissa, 1-bit sign, x86 32-bit architecture */ |
|
575 double x; |
|
576 int si,idx; |
|
577 register uint32_t lo, hi; |
|
578 int64_t rabs; |
|
579 uint32_t *p = (uint32_t *)&rabs; |
|
580 lo = randi32(); |
|
581 idx = lo&0xFF; |
|
582 hi = randi32(); |
|
583 si = hi&UMASK; |
|
584 p[0] = lo; |
|
585 p[1] = hi&0x1FFFFF; |
|
586 x = ( si ? -rabs : rabs ) * wi[idx]; |
|
587 # else /* !HAVE_X86_32 */ |
|
588 /* arbitrary mantissa (selected by NRANDI, with 1 bit for sign) */ |
|
589 const uint64_t r = NRANDI; |
|
590 const int64_t rabs=r>>1; |
|
591 const int idx = (int)(rabs&0xFF); |
|
592 const double x = ( r&1 ? -rabs : rabs) * wi[idx]; |
|
593 # endif /* !HAVE_X86_32 */ |
|
594 if (rabs < (int64_t)ki[idx]) |
|
595 #else /* ALLBITS */ |
|
596 /* 32-bit mantissa */ |
|
597 const uint32_t r = randi32(); |
|
598 const uint32_t rabs = r&LMASK; |
|
599 const int idx = (int)(r&0xFF); |
|
600 const double x = ((int32_t)r) * wi[idx]; |
|
601 if (rabs < ki[idx]) |
|
602 #endif /* ALLBITS */ |
|
603 return x; /* 99.3% of the time we return here 1st try */ |
|
604 else if (idx == 0) |
|
605 { |
|
606 /* As stated in Marsaglia and Tsang |
|
607 * |
|
608 * For the normal tail, the method of Marsaglia[5] provides: |
|
609 * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x, |
|
610 * then return r+x. Except that r+x is always in the positive |
|
611 * tail!!!! Any thing random might be used to determine the |
|
612 * sign, but as we already have r we might as well use it |
|
613 * |
|
614 * [PAK] but not the bottom 8 bits, since they are all 0 here! |
|
615 */ |
|
616 double xx, yy; |
|
617 do |
|
618 { |
|
619 xx = - ZIGGURAT_NOR_INV_R * log (RANDU); |
|
620 yy = - log (RANDU); |
|
621 } |
|
622 while ( yy+yy <= xx*xx); |
|
623 return (rabs&0x100 ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx); |
|
624 } |
|
625 else if ((fi[idx-1] - fi[idx]) * RANDU + fi[idx] < exp(-0.5*x*x)) |
|
626 return x; |
|
627 } |
|
628 } |
|
629 |
|
630 double |
|
631 oct_rande (void) |
|
632 { |
|
633 if (initt) |
|
634 create_ziggurat_tables(); |
|
635 |
|
636 while (1) |
|
637 { |
|
638 ZIGINT ri = ERANDI; |
|
639 const int idx = (int)(ri & 0xFF); |
|
640 const double x = ri * we[idx]; |
|
641 if (ri < ke[idx]) |
|
642 return x; // 98.9% of the time we return here 1st try |
|
643 else if (idx == 0) |
|
644 { |
|
645 /* As stated in Marsaglia and Tsang |
|
646 * |
|
647 * For the exponential tail, the method of Marsaglia[5] provides: |
|
648 * x = r - ln(U); |
|
649 */ |
|
650 return ZIGGURAT_EXP_R - log(RANDU); |
|
651 } |
|
652 else if ((fe[idx-1] - fe[idx]) * RANDU + fe[idx] < exp(-x)) |
|
653 return x; |
|
654 } |
|
655 } |
|
656 |
|
657 /* Array generators */ |
|
658 void |
|
659 oct_fill_randu (octave_idx_type n, double *p) |
|
660 { |
|
661 octave_idx_type i; |
|
662 for (i = 0; i < n; i++) |
|
663 p[i] = oct_randu(); |
|
664 } |
|
665 |
|
666 void |
|
667 oct_fill_randn (octave_idx_type n, double *p) |
|
668 { |
|
669 octave_idx_type i; |
|
670 for (i = 0; i < n; i++) |
|
671 p[i] = oct_randn(); |
|
672 } |
|
673 |
|
674 void |
|
675 oct_fill_rande (octave_idx_type n, double *p) |
|
676 { |
|
677 octave_idx_type i; |
|
678 for (i = 0; i < n; i++) |
|
679 p[i] = oct_rande(); |
|
680 } |
|
681 |
|
682 /* |
|
683 ;;; Local Variables: *** |
|
684 ;;; mode: C *** |
|
685 ;;; End: *** |
|
686 */ |