Mercurial > hg > octave-lyh
annotate liboctave/randmtzig.c @ 7533:ff52243af934
save state separately for each MT random number generator
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Tue, 26 Feb 2008 05:28:59 -0500 |
parents | 2eb392d058bb |
children | eb63fbe60fab |
rev | line source |
---|---|
7019 | 1 /* |
2 | |
3 Copyright (C) 2006, 2007 John W. Eaton | |
4 | |
5 This file is part of Octave. | |
6 | |
7 Octave is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by the | |
9 Free Software Foundation; either version 3 of the License, or (at your | |
10 option) any later version. | |
11 | |
12 Octave is distributed in the hope that it will be useful, but WITHOUT | |
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with Octave; see the file COPYING. If not, see | |
19 <http://www.gnu.org/licenses/>. | |
20 | |
21 */ | |
22 | |
5742 | 23 /* |
24 A C-program for MT19937, with initialization improved 2002/2/10. | |
25 Coded by Takuji Nishimura and Makoto Matsumoto. | |
26 This is a faster version by taking Shawn Cokus's optimization, | |
27 Matthe Bellew's simplification, Isaku Wada's real version. | |
28 David Bateman added normal and exponential distributions following | |
29 Marsaglia and Tang's Ziggurat algorithm. | |
30 | |
31 Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, | |
32 Copyright (C) 2004, David Bateman | |
33 All rights reserved. | |
34 | |
35 Redistribution and use in source and binary forms, with or without | |
36 modification, are permitted provided that the following conditions | |
37 are met: | |
38 | |
39 1. Redistributions of source code must retain the above copyright | |
40 notice, this list of conditions and the following disclaimer. | |
41 | |
42 2. Redistributions in binary form must reproduce the above copyright | |
43 notice, this list of conditions and the following disclaimer in the | |
44 documentation and/or other materials provided with the distribution. | |
45 | |
46 3. The names of its contributors may not be used to endorse or promote | |
47 products derived from this software without specific prior written | |
48 permission. | |
49 | |
50 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
51 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
52 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
53 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER | |
54 OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
55 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
56 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
57 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |
58 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |
59 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
60 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
61 | |
62 | |
63 Any feedback is very welcome. | |
64 http://www.math.keio.ac.jp/matumoto/emt.html | |
65 email: matumoto@math.keio.ac.jp | |
66 | |
67 * 2006-04-01 David Bateman | |
68 * * convert for use in octave, declaring static functions only used | |
69 * here and adding oct_ to functions visible externally | |
70 * * inverse sense of ALLBITS | |
71 * 2004-01-19 Paul Kienzle | |
72 * * comment out main | |
73 * add init_by_entropy, get_state, set_state | |
74 * * converted to allow compiling by C++ compiler | |
75 * | |
76 * 2004-01-25 David Bateman | |
77 * * Add Marsaglia and Tsang Ziggurat code | |
78 * | |
79 * 2004-07-13 Paul Kienzle | |
80 * * make into an independent library with some docs. | |
81 * * introduce new main and test code. | |
82 * | |
83 * 2004-07-28 Paul Kienzle & David Bateman | |
84 * * add -DALLBITS flag for 32 vs. 53 bits of randomness in mantissa | |
85 * * make the naming scheme more uniform | |
86 * * add -DHAVE_X86 for faster support of 53 bit mantissa on x86 arch. | |
87 * | |
88 * 2005-02-23 Paul Kienzle | |
89 * * fix -DHAVE_X86_32 flag and add -DUSE_X86_32=0|1 for explicit control | |
90 */ | |
91 | |
92 /* | |
93 === Build instructions === | |
94 | |
95 Compile with -DHAVE_GETTIMEOFDAY if the gettimeofday function is | |
96 available. This is not necessary if your architecture has | |
97 /dev/urandom defined. | |
98 | |
99 Compile with -DALLBITS to disable 53-bit random numbers. This is about | |
100 50% slower than using 32-bit random numbers. | |
101 | |
102 Uses implicit -Di386 or explicit -DHAVE_X86_32 to determine if CPU=x86. | |
103 You can force X86 behaviour with -DUSE_X86_32=1, or suppress it with | |
104 -DUSE_X86_32=0. You should also consider -march=i686 or similar for | |
105 extra performance. Check whether -DUSE_X86_32=0 is faster on 64-bit | |
106 x86 architectures. | |
107 | |
108 If you want to replace the Mersenne Twister with another | |
109 generator then redefine randi32 appropriately. | |
110 | |
111 === Usage instructions === | |
112 Before using any of the generators, initialize the state with one of | |
113 oct_init_by_int, oct_init_by_array or oct_init_by_entropy. | |
114 | |
115 All generators share the same state vector. | |
116 | |
117 === Mersenne Twister === | |
118 void oct_init_by_int(uint32_t s) 32-bit initial state | |
119 void oct_init_by_array(uint32_t k[],int m) m*32-bit initial state | |
120 void oct_init_by_entropy(void) random initial state | |
121 void oct_get_state(uint32_t save[MT_N+1]) saves state in array | |
122 void oct_set_state(uint32_t save[MT_N+1]) restores state from array | |
5766 | 123 static uint32_t randmt(void) returns 32-bit unsigned int |
5742 | 124 |
125 === inline generators === | |
5766 | 126 static uint32_t randi32(void) returns 32-bit unsigned int |
127 static uint64_t randi53(void) returns 53-bit unsigned int | |
128 static uint64_t randi54(void) returns 54-bit unsigned int | |
129 static uint64_t randi64(void) returns 64-bit unsigned int | |
130 static double randu32(void) returns 32-bit uniform in (0,1) | |
131 static double randu53(void) returns 53-bit uniform in (0,1) | |
5742 | 132 |
133 double oct_randu(void) returns M-bit uniform in (0,1) | |
134 double oct_randn(void) returns M-bit standard normal | |
135 double oct_rande(void) returns N-bit standard exponential | |
136 | |
137 === Array generators === | |
138 void oct_fill_randi32(octave_idx_type, uint32_t []) | |
139 void oct_fill_randi64(octave_idx_type, uint64_t []) | |
140 void oct_fill_randu(octave_idx_type, double []) | |
141 void oct_fill_randn(octave_idx_type, double []) | |
142 void oct_fill_rande(octave_idx_type, double []) | |
143 | |
144 */ | |
145 | |
146 #if defined (HAVE_CONFIG_H) | |
147 #include <config.h> | |
148 #endif | |
149 | |
150 #include <stdio.h> | |
151 #include <time.h> | |
152 | |
153 #ifdef HAVE_GETTIMEOFDAY | |
154 #include <sys/time.h> | |
155 #endif | |
156 | |
7231 | 157 #include "lo-math.h" |
5742 | 158 #include "randmtzig.h" |
159 | |
5775 | 160 /* FIXME may want to suppress X86 if sizeof(long)>4 */ |
5742 | 161 #if !defined(USE_X86_32) |
162 # if defined(i386) || defined(HAVE_X86_32) | |
163 # define USE_X86_32 1 | |
164 # else | |
165 # define USE_X86_32 0 | |
166 # endif | |
167 #endif | |
168 | |
169 /* ===== Mersenne Twister 32-bit generator ===== */ | |
170 | |
171 #define MT_M 397 | |
172 #define MATRIX_A 0x9908b0dfUL /* constant vector a */ | |
173 #define UMASK 0x80000000UL /* most significant w-r bits */ | |
174 #define LMASK 0x7fffffffUL /* least significant r bits */ | |
175 #define MIXBITS(u,v) ( ((u) & UMASK) | ((v) & LMASK) ) | |
176 #define TWIST(u,v) ((MIXBITS(u,v) >> 1) ^ ((v)&1UL ? MATRIX_A : 0UL)) | |
177 | |
178 static uint32_t *next; | |
179 static uint32_t state[MT_N]; /* the array for the state vector */ | |
180 static int left = 1; | |
181 static int initf = 0; | |
182 static int initt = 1; | |
183 | |
184 /* initializes state[MT_N] with a seed */ | |
185 void | |
186 oct_init_by_int (uint32_t s) | |
187 { | |
188 int j; | |
189 state[0] = s & 0xffffffffUL; | |
190 for (j = 1; j < MT_N; j++) { | |
191 state[j] = (1812433253UL * (state[j-1] ^ (state[j-1] >> 30)) + j); | |
192 /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ | |
193 /* In the previous versions, MSBs of the seed affect */ | |
194 /* only MSBs of the array state[]. */ | |
195 /* 2002/01/09 modified by Makoto Matsumoto */ | |
196 state[j] &= 0xffffffffUL; /* for >32 bit machines */ | |
197 } | |
198 left = 1; | |
199 initf = 1; | |
200 } | |
201 | |
202 /* initialize by an array with array-length */ | |
203 /* init_key is the array for initializing keys */ | |
204 /* key_length is its length */ | |
205 void | |
7533
ff52243af934
save state separately for each MT random number generator
John W. Eaton <jwe@octave.org>
parents:
7231
diff
changeset
|
206 oct_init_by_array (uint32_t *init_key, int key_length) |
5742 | 207 { |
208 int i, j, k; | |
209 oct_init_by_int (19650218UL); | |
210 i = 1; | |
211 j = 0; | |
212 k = (MT_N > key_length ? MT_N : key_length); | |
213 for (; k; k--) | |
214 { | |
215 state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1664525UL)) | |
216 + init_key[j] + j; /* non linear */ | |
217 state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ | |
218 i++; | |
219 j++; | |
220 if (i >= MT_N) | |
221 { | |
222 state[0] = state[MT_N-1]; | |
223 i = 1; | |
224 } | |
225 if (j >= key_length) | |
226 j = 0; | |
227 } | |
228 for (k = MT_N - 1; k; k--) | |
229 { | |
230 state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1566083941UL)) | |
231 - i; /* non linear */ | |
232 state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ | |
233 i++; | |
234 if (i >= MT_N) | |
235 { | |
236 state[0] = state[MT_N-1]; | |
237 i = 1; | |
238 } | |
239 } | |
240 | |
241 state[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ | |
242 left = 1; | |
243 initf = 1; | |
244 } | |
245 | |
246 void | |
247 oct_init_by_entropy (void) | |
248 { | |
249 uint32_t entropy[MT_N]; | |
250 int n = 0; | |
251 | |
252 /* Look for entropy in /dev/urandom */ | |
253 FILE* urandom =fopen("/dev/urandom", "rb"); | |
254 if (urandom) | |
255 { | |
256 while (n < MT_N) | |
257 { | |
258 unsigned char word[4]; | |
259 if (fread(word, 4, 1, urandom) != 1) | |
260 break; | |
261 entropy[n++] = word[0]+(word[1]<<8)+(word[2]<<16)+(word[3]<<24); | |
262 } | |
263 fclose(urandom); | |
264 } | |
265 | |
266 /* If there isn't enough entropy, gather some from various sources */ | |
267 if (n < MT_N) | |
268 entropy[n++] = time(NULL); /* Current time in seconds */ | |
269 if (n < MT_N) | |
270 entropy[n++] = clock(); /* CPU time used (usec) */ | |
271 #ifdef HAVE_GETTIMEOFDAY | |
272 if (n < MT_N) | |
273 { | |
274 struct timeval tv; | |
275 if (gettimeofday(&tv, NULL) != -1) | |
276 entropy[n++] = tv.tv_usec; /* Fractional part of current time */ | |
277 } | |
278 #endif | |
279 /* Send all the entropy into the initial state vector */ | |
280 oct_init_by_array(entropy,n); | |
281 } | |
282 | |
283 void | |
7533
ff52243af934
save state separately for each MT random number generator
John W. Eaton <jwe@octave.org>
parents:
7231
diff
changeset
|
284 oct_set_state (uint32_t *save) |
5742 | 285 { |
286 int i; | |
7533
ff52243af934
save state separately for each MT random number generator
John W. Eaton <jwe@octave.org>
parents:
7231
diff
changeset
|
287 for (i = 0; i < MT_N; i++) |
5742 | 288 state[i] = save[i]; |
289 left = save[MT_N]; | |
290 next = state + (MT_N - left + 1); | |
291 } | |
292 | |
293 void | |
7533
ff52243af934
save state separately for each MT random number generator
John W. Eaton <jwe@octave.org>
parents:
7231
diff
changeset
|
294 oct_get_state (uint32_t *save) |
5742 | 295 { |
296 int i; | |
297 for (i = 0; i < MT_N; i++) | |
298 save[i] = state[i]; | |
299 save[MT_N] = left; | |
300 } | |
301 | |
302 static void | |
303 next_state (void) | |
304 { | |
305 uint32_t *p = state; | |
306 int j; | |
307 | |
308 /* if init_by_int() has not been called, */ | |
309 /* a default initial seed is used */ | |
310 /* if (initf==0) init_by_int(5489UL); */ | |
311 /* Or better yet, a random seed! */ | |
312 if (initf == 0) | |
313 oct_init_by_entropy(); | |
314 | |
315 left = MT_N; | |
316 next = state; | |
317 | |
318 for (j = MT_N - MT_M + 1; --j; p++) | |
319 *p = p[MT_M] ^ TWIST(p[0], p[1]); | |
320 | |
321 for (j = MT_M; --j; p++) | |
322 *p = p[MT_M-MT_N] ^ TWIST(p[0], p[1]); | |
323 | |
324 *p = p[MT_M-MT_N] ^ TWIST(p[0], state[0]); | |
325 } | |
326 | |
327 /* generates a random number on [0,0xffffffff]-interval */ | |
5766 | 328 static uint32_t |
5742 | 329 randmt (void) |
330 { | |
331 register uint32_t y; | |
332 | |
333 if (--left == 0) | |
334 next_state(); | |
335 y = *next++; | |
336 | |
337 /* Tempering */ | |
338 y ^= (y >> 11); | |
339 y ^= (y << 7) & 0x9d2c5680UL; | |
340 y ^= (y << 15) & 0xefc60000UL; | |
341 return (y ^ (y >> 18)); | |
342 } | |
343 | |
344 /* ===== Uniform generators ===== */ | |
345 | |
346 /* Select which 32 bit generator to use */ | |
347 #define randi32 randmt | |
348 | |
5766 | 349 static uint64_t |
5742 | 350 randi53 (void) |
351 { | |
352 const uint32_t lo = randi32(); | |
353 const uint32_t hi = randi32()&0x1FFFFF; | |
354 #if HAVE_X86_32 | |
355 uint64_t u; | |
356 uint32_t *p = (uint32_t *)&u; | |
357 p[0] = lo; | |
358 p[1] = hi; | |
359 return u; | |
360 #else | |
361 return (((uint64_t)hi<<32)|lo); | |
362 #endif | |
363 } | |
364 | |
5766 | 365 static uint64_t |
5742 | 366 randi54 (void) |
367 { | |
368 const uint32_t lo = randi32(); | |
369 const uint32_t hi = randi32()&0x3FFFFF; | |
370 #if HAVE_X86_32 | |
371 uint64_t u; | |
372 uint32_t *p = (uint32_t *)&u; | |
373 p[0] = lo; | |
374 p[1] = hi; | |
375 return u; | |
376 #else | |
377 return (((uint64_t)hi<<32)|lo); | |
378 #endif | |
379 } | |
380 | |
6959 | 381 #if 0 |
382 // FIXME -- this doesn't seem to be used anywhere; should it be removed? | |
5766 | 383 static uint64_t |
5742 | 384 randi64 (void) |
385 { | |
386 const uint32_t lo = randi32(); | |
387 const uint32_t hi = randi32(); | |
388 #if HAVE_X86_32 | |
389 uint64_t u; | |
390 uint32_t *p = (uint32_t *)&u; | |
391 p[0] = lo; | |
392 p[1] = hi; | |
393 return u; | |
394 #else | |
395 return (((uint64_t)hi<<32)|lo); | |
396 #endif | |
397 } | |
6959 | 398 #endif |
5742 | 399 |
6959 | 400 #ifdef ALLBITS |
5742 | 401 /* generates a random number on (0,1)-real-interval */ |
5766 | 402 static double |
5742 | 403 randu32 (void) |
404 { | |
405 return ((double)randi32() + 0.5) * (1.0/4294967296.0); | |
406 /* divided by 2^32 */ | |
407 } | |
6959 | 408 #else |
5742 | 409 /* generates a random number on (0,1) with 53-bit resolution */ |
5766 | 410 static double |
5742 | 411 randu53 (void) |
412 { | |
413 const uint32_t a=randi32()>>5; | |
414 const uint32_t b=randi32()>>6; | |
6959 | 415 return (a*67108864.0+b+0.4) * (1.0/9007199254740992.0); |
5742 | 416 } |
6959 | 417 #endif |
5742 | 418 |
419 /* Determine mantissa for uniform doubles */ | |
420 double | |
421 oct_randu (void) | |
422 { | |
6959 | 423 #ifdef ALLBITS |
424 return randu32 (); | |
5742 | 425 #else |
6959 | 426 return randu53 (); |
427 #endif | |
5742 | 428 } |
429 | |
430 /* ===== Ziggurat normal and exponential generators ===== */ | |
431 #ifdef ALLBITS | |
432 # define ZIGINT uint32_t | |
433 # define EMANTISSA 4294967296.0 /* 32 bit mantissa */ | |
434 # define ERANDI randi32() /* 32 bits for mantissa */ | |
435 # define NMANTISSA 2147483648.0 /* 31 bit mantissa */ | |
436 # define NRANDI randi32() /* 31 bits for mantissa + 1 bit sign */ | |
437 # define RANDU randu32() | |
438 #else | |
439 # define ZIGINT uint64_t | |
440 # define EMANTISSA 9007199254740992.0 /* 53 bit mantissa */ | |
441 # define ERANDI randi53() /* 53 bits for mantissa */ | |
442 # define NMANTISSA EMANTISSA | |
443 # define NRANDI randi54() /* 53 bits for mantissa + 1 bit sign */ | |
444 # define RANDU randu53() | |
445 #endif | |
446 | |
447 #define ZIGGURAT_TABLE_SIZE 256 | |
448 | |
449 #define ZIGGURAT_NOR_R 3.6541528853610088 | |
450 #define ZIGGURAT_NOR_INV_R 0.27366123732975828 | |
451 #define NOR_SECTION_AREA 0.00492867323399 | |
452 | |
453 #define ZIGGURAT_EXP_R 7.69711747013104972 | |
454 #define ZIGGURAT_EXP_INV_R 0.129918765548341586 | |
455 #define EXP_SECTION_AREA 0.0039496598225815571993 | |
456 | |
457 static ZIGINT ki[ZIGGURAT_TABLE_SIZE]; | |
458 static double wi[ZIGGURAT_TABLE_SIZE], fi[ZIGGURAT_TABLE_SIZE]; | |
459 static ZIGINT ke[ZIGGURAT_TABLE_SIZE]; | |
460 static double we[ZIGGURAT_TABLE_SIZE], fe[ZIGGURAT_TABLE_SIZE]; | |
461 | |
462 /* | |
463 This code is based on the paper Marsaglia and Tsang, "The ziggurat method | |
464 for generating random variables", Journ. Statistical Software. Code was | |
465 presented in this paper for a Ziggurat of 127 levels and using a 32 bit | |
466 integer random number generator. This version of the code, uses the | |
467 Mersenne Twister as the integer generator and uses 256 levels in the | |
468 Ziggurat. This has several advantages. | |
469 | |
470 1) As Marsaglia and Tsang themselves states, the more levels the few | |
471 times the expensive tail algorithm must be called | |
472 2) The cycle time of the generator is determined by the integer | |
473 generator, thus the use of a Mersenne Twister for the core random | |
474 generator makes this cycle extremely long. | |
475 3) The license on the original code was unclear, thus rewriting the code | |
476 from the article means we are free of copyright issues. | |
477 4) Compile flag for full 53-bit random mantissa. | |
478 | |
479 It should be stated that the authors made my life easier, by the fact that | |
480 the algorithm developed in the text of the article is for a 256 level | |
481 ziggurat, even if the code itself isn't... | |
482 | |
483 One modification to the algorithm developed in the article, is that it is | |
484 assumed that 0 <= x < Inf, and "unsigned long"s are used, thus resulting in | |
485 terms like 2^32 in the code. As the normal distribution is defined between | |
486 -Inf < x < Inf, we effectively only have 31 bit integers plus a sign. Thus | |
487 in Marsaglia and Tsang, terms like 2^32 become 2^31. We use NMANTISSA for | |
488 this term. The exponential distribution is one sided so we use the | |
489 full 32 bits. We use EMANTISSA for this term. | |
490 | |
491 It appears that I'm slightly slower than the code in the article, this | |
492 is partially due to a better generator of random integers than they | |
493 use. But might also be that the case of rapid return was optimized by | |
494 inlining the relevant code with a #define. As the basic Mersenne | |
495 Twister is only 25% faster than this code I suspect that the main | |
496 reason is just the use of the Mersenne Twister and not the inlining, | |
497 so I'm not going to try and optimize further. | |
498 */ | |
499 | |
500 static void | |
501 create_ziggurat_tables (void) | |
502 { | |
503 int i; | |
504 double x, x1; | |
505 | |
506 /* Ziggurat tables for the normal distribution */ | |
507 x1 = ZIGGURAT_NOR_R; | |
508 wi[255] = x1 / NMANTISSA; | |
509 fi[255] = exp (-0.5 * x1 * x1); | |
510 | |
511 /* Index zero is special for tail strip, where Marsaglia and Tsang | |
512 * defines this as | |
513 * k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1, | |
514 * where v is the area of each strip of the ziggurat. | |
515 */ | |
516 ki[0] = (ZIGINT) (x1 * fi[255] / NOR_SECTION_AREA * NMANTISSA); | |
517 wi[0] = NOR_SECTION_AREA / fi[255] / NMANTISSA; | |
518 fi[0] = 1.; | |
519 | |
520 for (i = 254; i > 0; i--) | |
521 { | |
522 /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus | |
523 * need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y)) | |
524 */ | |
525 x = sqrt(-2. * log(NOR_SECTION_AREA / x1 + fi[i+1])); | |
526 ki[i+1] = (ZIGINT)(x / x1 * NMANTISSA); | |
527 wi[i] = x / NMANTISSA; | |
528 fi[i] = exp (-0.5 * x * x); | |
529 x1 = x; | |
530 } | |
531 | |
532 ki[1] = 0; | |
533 | |
534 /* Zigurrat tables for the exponential distribution */ | |
535 x1 = ZIGGURAT_EXP_R; | |
536 we[255] = x1 / EMANTISSA; | |
537 fe[255] = exp (-x1); | |
538 | |
539 /* Index zero is special for tail strip, where Marsaglia and Tsang | |
540 * defines this as | |
541 * k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1, | |
542 * where v is the area of each strip of the ziggurat. | |
543 */ | |
544 ke[0] = (ZIGINT) (x1 * fe[255] / EXP_SECTION_AREA * EMANTISSA); | |
545 we[0] = EXP_SECTION_AREA / fe[255] / EMANTISSA; | |
546 fe[0] = 1.; | |
547 | |
548 for (i = 254; i > 0; i--) | |
549 { | |
550 /* New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus | |
551 * need inverse operator of y = exp(-x) -> x = -ln(y) | |
552 */ | |
553 x = - log(EXP_SECTION_AREA / x1 + fe[i+1]); | |
554 ke[i+1] = (ZIGINT)(x / x1 * EMANTISSA); | |
555 we[i] = x / EMANTISSA; | |
556 fe[i] = exp (-x); | |
557 x1 = x; | |
558 } | |
559 ke[1] = 0; | |
560 | |
561 initt = 0; | |
562 } | |
563 | |
564 /* | |
565 * Here is the guts of the algorithm. As Marsaglia and Tsang state the | |
566 * algorithm in their paper | |
567 * | |
568 * 1) Calculate a random signed integer j and let i be the index | |
569 * provided by the rightmost 8-bits of j | |
570 * 2) Set x = j * w_i. If j < k_i return x | |
571 * 3) If i = 0, then return x from the tail | |
572 * 4) If [f(x_{i-1}) - f(x_i)] * U < f(x) - f(x_i), return x | |
573 * 5) goto step 1 | |
574 * | |
575 * Where f is the functional form of the distribution, which for a normal | |
576 * distribution is exp(-0.5*x*x) | |
577 */ | |
578 | |
579 double | |
580 oct_randn (void) | |
581 { | |
582 if (initt) | |
583 create_ziggurat_tables(); | |
584 | |
585 while (1) | |
586 { | |
587 /* The following code is specialized for 32-bit mantissa. | |
588 * Compared to the arbitrary mantissa code, there is a performance | |
589 * gain for 32-bits: PPC: 2%, MIPS: 8%, x86: 40% | |
590 * There is a bigger performance gain compared to using a full | |
591 * 53-bit mantissa: PPC: 60%, MIPS: 65%, x86: 240% | |
592 * Of course, different compilers and operating systems may | |
593 * have something to do with this. | |
594 */ | |
595 #if !defined(ALLBITS) | |
596 # if HAVE_X86_32 | |
597 /* 53-bit mantissa, 1-bit sign, x86 32-bit architecture */ | |
598 double x; | |
599 int si,idx; | |
600 register uint32_t lo, hi; | |
601 int64_t rabs; | |
602 uint32_t *p = (uint32_t *)&rabs; | |
603 lo = randi32(); | |
604 idx = lo&0xFF; | |
605 hi = randi32(); | |
606 si = hi&UMASK; | |
607 p[0] = lo; | |
608 p[1] = hi&0x1FFFFF; | |
609 x = ( si ? -rabs : rabs ) * wi[idx]; | |
610 # else /* !HAVE_X86_32 */ | |
611 /* arbitrary mantissa (selected by NRANDI, with 1 bit for sign) */ | |
612 const uint64_t r = NRANDI; | |
613 const int64_t rabs=r>>1; | |
614 const int idx = (int)(rabs&0xFF); | |
615 const double x = ( r&1 ? -rabs : rabs) * wi[idx]; | |
616 # endif /* !HAVE_X86_32 */ | |
617 if (rabs < (int64_t)ki[idx]) | |
618 #else /* ALLBITS */ | |
619 /* 32-bit mantissa */ | |
620 const uint32_t r = randi32(); | |
621 const uint32_t rabs = r&LMASK; | |
622 const int idx = (int)(r&0xFF); | |
623 const double x = ((int32_t)r) * wi[idx]; | |
624 if (rabs < ki[idx]) | |
625 #endif /* ALLBITS */ | |
626 return x; /* 99.3% of the time we return here 1st try */ | |
627 else if (idx == 0) | |
628 { | |
629 /* As stated in Marsaglia and Tsang | |
630 * | |
631 * For the normal tail, the method of Marsaglia[5] provides: | |
632 * generate x = -ln(U_1)/r, y = -ln(U_2), until y+y > x*x, | |
633 * then return r+x. Except that r+x is always in the positive | |
634 * tail!!!! Any thing random might be used to determine the | |
635 * sign, but as we already have r we might as well use it | |
636 * | |
637 * [PAK] but not the bottom 8 bits, since they are all 0 here! | |
638 */ | |
639 double xx, yy; | |
640 do | |
641 { | |
642 xx = - ZIGGURAT_NOR_INV_R * log (RANDU); | |
643 yy = - log (RANDU); | |
644 } | |
645 while ( yy+yy <= xx*xx); | |
646 return (rabs&0x100 ? -ZIGGURAT_NOR_R-xx : ZIGGURAT_NOR_R+xx); | |
647 } | |
648 else if ((fi[idx-1] - fi[idx]) * RANDU + fi[idx] < exp(-0.5*x*x)) | |
649 return x; | |
650 } | |
651 } | |
652 | |
653 double | |
654 oct_rande (void) | |
655 { | |
656 if (initt) | |
657 create_ziggurat_tables(); | |
658 | |
659 while (1) | |
660 { | |
661 ZIGINT ri = ERANDI; | |
662 const int idx = (int)(ri & 0xFF); | |
663 const double x = ri * we[idx]; | |
664 if (ri < ke[idx]) | |
665 return x; // 98.9% of the time we return here 1st try | |
666 else if (idx == 0) | |
667 { | |
668 /* As stated in Marsaglia and Tsang | |
669 * | |
670 * For the exponential tail, the method of Marsaglia[5] provides: | |
671 * x = r - ln(U); | |
672 */ | |
673 return ZIGGURAT_EXP_R - log(RANDU); | |
674 } | |
675 else if ((fe[idx-1] - fe[idx]) * RANDU + fe[idx] < exp(-x)) | |
676 return x; | |
677 } | |
678 } | |
679 | |
680 /* Array generators */ | |
681 void | |
682 oct_fill_randu (octave_idx_type n, double *p) | |
683 { | |
684 octave_idx_type i; | |
685 for (i = 0; i < n; i++) | |
686 p[i] = oct_randu(); | |
687 } | |
688 | |
689 void | |
690 oct_fill_randn (octave_idx_type n, double *p) | |
691 { | |
692 octave_idx_type i; | |
693 for (i = 0; i < n; i++) | |
694 p[i] = oct_randn(); | |
695 } | |
696 | |
697 void | |
698 oct_fill_rande (octave_idx_type n, double *p) | |
699 { | |
700 octave_idx_type i; | |
701 for (i = 0; i < n; i++) | |
702 p[i] = oct_rande(); | |
703 } | |
704 | |
705 /* | |
706 ;;; Local Variables: *** | |
707 ;;; mode: C *** | |
708 ;;; End: *** | |
709 */ |