Mercurial > hg > octave-nkf
comparison liboctave/oct-sort.cc @ 4851:047ff938b0d9
[project @ 2004-04-06 17:12:14 by jwe]
author | jwe |
---|---|
date | Tue, 06 Apr 2004 17:14:17 +0000 |
parents | |
children | 7514d69b422a |
comparison
equal
deleted
inserted
replaced
4850:8cc4818a0de0 | 4851:047ff938b0d9 |
---|---|
1 /* | |
2 Copyright (C) 2003 David Bateman | |
3 | |
4 This file is part of Octave. | |
5 | |
6 Octave is free software; you can redistribute it and/or modify it | |
7 under the terms of the GNU General Public License as published by the | |
8 Free Software Foundation; either version 2, or (at your option) any | |
9 later version. | |
10 | |
11 Octave is distributed in the hope that it will be useful, but WITHOUT | |
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with Octave; see the file COPYING. If not, write to the Free | |
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
19 | |
20 Code stolen in large part from Python's, listobject.c, which itself had | |
21 no license header. However, thanks to Tim Peters for the parts of the | |
22 code I ripped-off. | |
23 | |
24 As required in the Python license the short description of the changes | |
25 made are | |
26 | |
27 * convert the sorting code in listobject.cc into a generic class, | |
28 replacing PyObject* with the type of the class T. | |
29 | |
30 The Python license is | |
31 | |
32 PSF LICENSE AGREEMENT FOR PYTHON 2.3 | |
33 -------------------------------------- | |
34 | |
35 1. This LICENSE AGREEMENT is between the Python Software Foundation | |
36 ("PSF"), and the Individual or Organization ("Licensee") accessing and | |
37 otherwise using Python 2.3 software in source or binary form and its | |
38 associated documentation. | |
39 | |
40 2. Subject to the terms and conditions of this License Agreement, PSF | |
41 hereby grants Licensee a nonexclusive, royalty-free, world-wide | |
42 license to reproduce, analyze, test, perform and/or display publicly, | |
43 prepare derivative works, distribute, and otherwise use Python 2.3 | |
44 alone or in any derivative version, provided, however, that PSF's | |
45 License Agreement and PSF's notice of copyright, i.e., "Copyright (c) | |
46 2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are | |
47 retained in Python 2.3 alone or in any derivative version prepared by | |
48 Licensee. | |
49 | |
50 3. In the event Licensee prepares a derivative work that is based on | |
51 or incorporates Python 2.3 or any part thereof, and wants to make | |
52 the derivative work available to others as provided herein, then | |
53 Licensee hereby agrees to include in any such work a brief summary of | |
54 the changes made to Python 2.3. | |
55 | |
56 4. PSF is making Python 2.3 available to Licensee on an "AS IS" | |
57 basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR | |
58 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND | |
59 DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS | |
60 FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT | |
61 INFRINGE ANY THIRD PARTY RIGHTS. | |
62 | |
63 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON | |
64 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS | |
65 A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3, | |
66 OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. | |
67 | |
68 6. This License Agreement will automatically terminate upon a material | |
69 breach of its terms and conditions. | |
70 | |
71 7. Nothing in this License Agreement shall be deemed to create any | |
72 relationship of agency, partnership, or joint venture between PSF and | |
73 Licensee. This License Agreement does not grant permission to use PSF | |
74 trademarks or trade name in a trademark sense to endorse or promote | |
75 products or services of Licensee, or any third party. | |
76 | |
77 8. By copying, installing or otherwise using Python 2.3, Licensee | |
78 agrees to be bound by the terms and conditions of this License | |
79 Agreement. | |
80 */ | |
81 | |
82 #ifdef HAVE_CONFIG_H | |
83 #include <config.h> | |
84 #endif | |
85 | |
86 #include "oct-obj.h" | |
87 #include "lo-mappers.h" | |
88 #include "quit.h" | |
89 #include "oct-sort.h" | |
90 | |
91 #define IFLT(a,b) if (compare == NULL ? ((a) < (b)) : compare ((a), (b))) | |
92 | |
93 template <class T> | |
94 octave_sort<T>::octave_sort (void) : compare (NULL) | |
95 { | |
96 merge_init ( ); | |
97 merge_getmem (1024); | |
98 } | |
99 | |
100 template <class T> | |
101 octave_sort<T>::octave_sort (bool (*comp) (T, T)) : compare (comp) | |
102 { | |
103 merge_init ( ); | |
104 merge_getmem (1024); | |
105 } | |
106 | |
107 /* Reverse a slice of a list in place, from lo up to (exclusive) hi. */ | |
108 template <class T> | |
109 void | |
110 octave_sort<T>::reverse_slice(T *lo, T *hi) | |
111 { | |
112 --hi; | |
113 while (lo < hi) | |
114 { | |
115 T t = *lo; | |
116 *lo = *hi; | |
117 *hi = t; | |
118 ++lo; | |
119 --hi; | |
120 } | |
121 } | |
122 | |
123 template <class T> | |
124 void | |
125 octave_sort<T>::binarysort (T *lo, T *hi, T *start) | |
126 { | |
127 register T *l, *p, *r; | |
128 register T pivot; | |
129 | |
130 if (lo == start) | |
131 ++start; | |
132 | |
133 for (; start < hi; ++start) | |
134 { | |
135 /* set l to where *start belongs */ | |
136 l = lo; | |
137 r = start; | |
138 pivot = *r; | |
139 /* Invariants: | |
140 * pivot >= all in [lo, l). | |
141 * pivot < all in [r, start). | |
142 * The second is vacuously true at the start. | |
143 */ | |
144 do | |
145 { | |
146 p = l + ((r - l) >> 1); | |
147 IFLT (pivot, *p) | |
148 r = p; | |
149 else | |
150 l = p+1; | |
151 } | |
152 while (l < r); | |
153 /* The invariants still hold, so pivot >= all in [lo, l) and | |
154 pivot < all in [l, start), so pivot belongs at l. Note | |
155 that if there are elements equal to pivot, l points to the | |
156 first slot after them -- that's why this sort is stable. | |
157 Slide over to make room. | |
158 Caution: using memmove is much slower under MSVC 5; | |
159 we're not usually moving many slots. */ | |
160 for (p = start; p > l; --p) | |
161 *p = *(p-1); | |
162 *l = pivot; | |
163 } | |
164 | |
165 return; | |
166 } | |
167 | |
168 /* | |
169 Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi | |
170 is required on entry. "A run" is the longest ascending sequence, with | |
171 | |
172 lo[0] <= lo[1] <= lo[2] <= ... | |
173 | |
174 or the longest descending sequence, with | |
175 | |
176 lo[0] > lo[1] > lo[2] > ... | |
177 | |
178 Boolean *descending is set to 0 in the former case, or to 1 in the latter. | |
179 For its intended use in a stable mergesort, the strictness of the defn of | |
180 "descending" is needed so that the caller can safely reverse a descending | |
181 sequence without violating stability (strict > ensures there are no equal | |
182 elements to get out of order). | |
183 | |
184 Returns -1 in case of error. | |
185 */ | |
186 template <class T> | |
187 int | |
188 octave_sort<T>::count_run(T *lo, T *hi, int *descending) | |
189 { | |
190 int n; | |
191 | |
192 *descending = 0; | |
193 ++lo; | |
194 if (lo == hi) | |
195 return 1; | |
196 | |
197 n = 2; | |
198 | |
199 IFLT (*lo, *(lo-1)) | |
200 { | |
201 *descending = 1; | |
202 for (lo = lo+1; lo < hi; ++lo, ++n) | |
203 { | |
204 IFLT (*lo, *(lo-1)) | |
205 ; | |
206 else | |
207 break; | |
208 } | |
209 } | |
210 else | |
211 { | |
212 for (lo = lo+1; lo < hi; ++lo, ++n) | |
213 { | |
214 IFLT (*lo, *(lo-1)) | |
215 break; | |
216 } | |
217 } | |
218 | |
219 return n; | |
220 } | |
221 | |
222 /* | |
223 Locate the proper position of key in a sorted vector; if the vector contains | |
224 an element equal to key, return the position immediately to the left of | |
225 the leftmost equal element. [gallop_right() does the same except returns | |
226 the position to the right of the rightmost equal element (if any).] | |
227 | |
228 "a" is a sorted vector with n elements, starting at a[0]. n must be > 0. | |
229 | |
230 "hint" is an index at which to begin the search, 0 <= hint < n. The closer | |
231 hint is to the final result, the faster this runs. | |
232 | |
233 The return value is the int k in 0..n such that | |
234 | |
235 a[k-1] < key <= a[k] | |
236 | |
237 pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW, | |
238 key belongs at index k; or, IOW, the first k elements of a should precede | |
239 key, and the last n-k should follow key. | |
240 | |
241 Returns -1 on error. See listsort.txt for info on the method. | |
242 */ | |
243 template <class T> | |
244 int | |
245 octave_sort<T>::gallop_left(T key, T *a, int n, int hint) | |
246 { | |
247 int ofs; | |
248 int lastofs; | |
249 int k; | |
250 | |
251 a += hint; | |
252 lastofs = 0; | |
253 ofs = 1; | |
254 IFLT (*a, key) | |
255 { | |
256 /* a[hint] < key -- gallop right, until | |
257 * a[hint + lastofs] < key <= a[hint + ofs] | |
258 */ | |
259 const int maxofs = n - hint; /* &a[n-1] is highest */ | |
260 while (ofs < maxofs) | |
261 { | |
262 IFLT (a[ofs], key) | |
263 { | |
264 lastofs = ofs; | |
265 ofs = (ofs << 1) + 1; | |
266 if (ofs <= 0) /* int overflow */ | |
267 ofs = maxofs; | |
268 } | |
269 else /* key <= a[hint + ofs] */ | |
270 break; | |
271 } | |
272 if (ofs > maxofs) | |
273 ofs = maxofs; | |
274 /* Translate back to offsets relative to &a[0]. */ | |
275 lastofs += hint; | |
276 ofs += hint; | |
277 } | |
278 else | |
279 { | |
280 /* key <= a[hint] -- gallop left, until | |
281 * a[hint - ofs] < key <= a[hint - lastofs] | |
282 */ | |
283 const int maxofs = hint + 1; /* &a[0] is lowest */ | |
284 while (ofs < maxofs) | |
285 { | |
286 IFLT (*(a-ofs), key) | |
287 break; | |
288 /* key <= a[hint - ofs] */ | |
289 lastofs = ofs; | |
290 ofs = (ofs << 1) + 1; | |
291 if (ofs <= 0) /* int overflow */ | |
292 ofs = maxofs; | |
293 } | |
294 if (ofs > maxofs) | |
295 ofs = maxofs; | |
296 /* Translate back to positive offsets relative to &a[0]. */ | |
297 k = lastofs; | |
298 lastofs = hint - ofs; | |
299 ofs = hint - k; | |
300 } | |
301 a -= hint; | |
302 | |
303 /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the | |
304 * right of lastofs but no farther right than ofs. Do a binary | |
305 * search, with invariant a[lastofs-1] < key <= a[ofs]. | |
306 */ | |
307 ++lastofs; | |
308 while (lastofs < ofs) | |
309 { | |
310 int m = lastofs + ((ofs - lastofs) >> 1); | |
311 | |
312 IFLT (a[m], key) | |
313 lastofs = m+1; /* a[m] < key */ | |
314 else | |
315 ofs = m; /* key <= a[m] */ | |
316 } | |
317 return ofs; | |
318 } | |
319 | |
320 /* | |
321 Exactly like gallop_left(), except that if key already exists in a[0:n], | |
322 finds the position immediately to the right of the rightmost equal value. | |
323 | |
324 The return value is the int k in 0..n such that | |
325 | |
326 a[k-1] <= key < a[k] | |
327 | |
328 or -1 if error. | |
329 | |
330 The code duplication is massive, but this is enough different given that | |
331 we're sticking to "<" comparisons that it's much harder to follow if | |
332 written as one routine with yet another "left or right?" flag. | |
333 */ | |
334 template <class T> | |
335 int | |
336 octave_sort<T>::gallop_right(T key, T *a, int n, int hint) | |
337 { | |
338 int ofs; | |
339 int lastofs; | |
340 int k; | |
341 | |
342 a += hint; | |
343 lastofs = 0; | |
344 ofs = 1; | |
345 IFLT (key, *a) | |
346 { | |
347 /* key < a[hint] -- gallop left, until | |
348 * a[hint - ofs] <= key < a[hint - lastofs] | |
349 */ | |
350 const int maxofs = hint + 1; /* &a[0] is lowest */ | |
351 while (ofs < maxofs) | |
352 { | |
353 IFLT (key, *(a-ofs)) | |
354 { | |
355 lastofs = ofs; | |
356 ofs = (ofs << 1) + 1; | |
357 if (ofs <= 0) /* int overflow */ | |
358 ofs = maxofs; | |
359 } | |
360 else /* a[hint - ofs] <= key */ | |
361 break; | |
362 } | |
363 if (ofs > maxofs) | |
364 ofs = maxofs; | |
365 /* Translate back to positive offsets relative to &a[0]. */ | |
366 k = lastofs; | |
367 lastofs = hint - ofs; | |
368 ofs = hint - k; | |
369 } | |
370 else | |
371 { | |
372 /* a[hint] <= key -- gallop right, until | |
373 * a[hint + lastofs] <= key < a[hint + ofs] | |
374 */ | |
375 const int maxofs = n - hint; /* &a[n-1] is highest */ | |
376 while (ofs < maxofs) | |
377 { | |
378 IFLT (key, a[ofs]) | |
379 break; | |
380 /* a[hint + ofs] <= key */ | |
381 lastofs = ofs; | |
382 ofs = (ofs << 1) + 1; | |
383 if (ofs <= 0) /* int overflow */ | |
384 ofs = maxofs; | |
385 } | |
386 if (ofs > maxofs) | |
387 ofs = maxofs; | |
388 /* Translate back to offsets relative to &a[0]. */ | |
389 lastofs += hint; | |
390 ofs += hint; | |
391 } | |
392 a -= hint; | |
393 | |
394 /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the | |
395 * right of lastofs but no farther right than ofs. Do a binary | |
396 * search, with invariant a[lastofs-1] <= key < a[ofs]. | |
397 */ | |
398 ++lastofs; | |
399 while (lastofs < ofs) | |
400 { | |
401 int m = lastofs + ((ofs - lastofs) >> 1); | |
402 | |
403 IFLT (key, a[m]) | |
404 ofs = m; /* key < a[m] */ | |
405 else | |
406 lastofs = m+1; /* a[m] <= key */ | |
407 } | |
408 return ofs; | |
409 } | |
410 | |
411 /* Conceptually a MergeState's constructor. */ | |
412 template <class T> | |
413 void | |
414 octave_sort<T>::merge_init(void) | |
415 { | |
416 ms.a = NULL; | |
417 ms.alloced = 0; | |
418 ms.n = 0; | |
419 ms.min_gallop = MIN_GALLOP; | |
420 } | |
421 | |
422 /* Free all the temp memory owned by the MergeState. This must be called | |
423 * when you're done with a MergeState, and may be called before then if | |
424 * you want to free the temp memory early. | |
425 */ | |
426 template <class T> | |
427 void | |
428 octave_sort<T>::merge_freemem(void) | |
429 { | |
430 if (ms.a) | |
431 free (ms.a); | |
432 ms.alloced = 0; | |
433 ms.a = NULL; | |
434 } | |
435 | |
436 static inline int | |
437 roundupsize(int n) | |
438 { | |
439 unsigned int nbits = 3; | |
440 unsigned int n2 = (unsigned int)n >> 8; | |
441 | |
442 /* Round up: | |
443 * If n < 256, to a multiple of 8. | |
444 * If n < 2048, to a multiple of 64. | |
445 * If n < 16384, to a multiple of 512. | |
446 * If n < 131072, to a multiple of 4096. | |
447 * If n < 1048576, to a multiple of 32768. | |
448 * If n < 8388608, to a multiple of 262144. | |
449 * If n < 67108864, to a multiple of 2097152. | |
450 * If n < 536870912, to a multiple of 16777216. | |
451 * ... | |
452 * If n < 2**(5+3*i), to a multiple of 2**(3*i). | |
453 * | |
454 * This over-allocates proportional to the list size, making room | |
455 * for additional growth. The over-allocation is mild, but is | |
456 * enough to give linear-time amortized behavior over a long | |
457 * sequence of appends() in the presence of a poorly-performing | |
458 * system realloc() (which is a reality, e.g., across all flavors | |
459 * of Windows, with Win9x behavior being particularly bad -- and | |
460 * we've still got address space fragmentation problems on Win9x | |
461 * even with this scheme, although it requires much longer lists to | |
462 * provoke them than it used to). | |
463 */ | |
464 while (n2) { | |
465 n2 >>= 3; | |
466 nbits += 3; | |
467 } | |
468 return ((n >> nbits) + 1) << nbits; | |
469 } | |
470 | |
471 /* Ensure enough temp memory for 'need' array slots is available. | |
472 * Returns 0 on success and -1 if the memory can't be gotten. | |
473 */ | |
474 template <class T> | |
475 int | |
476 octave_sort<T>::merge_getmem(int need) | |
477 { | |
478 if (need <= ms.alloced) | |
479 return 0; | |
480 | |
481 need = roundupsize(need); | |
482 /* Don't realloc! That can cost cycles to copy the old data, but | |
483 * we don't care what's in the block. | |
484 */ | |
485 merge_freemem( ); | |
486 ms.a = (T *) malloc (need * sizeof (T)); | |
487 if (ms.a) { | |
488 ms.alloced = need; | |
489 return 0; | |
490 } | |
491 merge_freemem( ); /* reset to sane state */ | |
492 return -1; | |
493 } | |
494 | |
495 #define MERGE_GETMEM(NEED) ((NEED) <= ms.alloced ? 0 : \ | |
496 merge_getmem(NEED)) | |
497 | |
498 /* Merge the na elements starting at pa with the nb elements starting at pb | |
499 * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. | |
500 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the | |
501 * merge, and should have na <= nb. See listsort.txt for more info. | |
502 * Return 0 if successful, -1 if error. | |
503 */ | |
504 template <class T> | |
505 int | |
506 octave_sort<T>::merge_lo(T *pa, int na, T *pb, int nb) | |
507 { | |
508 int k; | |
509 T *dest; | |
510 int result = -1; /* guilty until proved innocent */ | |
511 int min_gallop = ms.min_gallop; | |
512 | |
513 if (MERGE_GETMEM(na) < 0) | |
514 return -1; | |
515 memcpy(ms.a, pa, na * sizeof(T)); | |
516 dest = pa; | |
517 pa = ms.a; | |
518 | |
519 *dest++ = *pb++; | |
520 --nb; | |
521 if (nb == 0) | |
522 goto Succeed; | |
523 if (na == 1) | |
524 goto CopyB; | |
525 | |
526 for (;;) { | |
527 int acount = 0; /* # of times A won in a row */ | |
528 int bcount = 0; /* # of times B won in a row */ | |
529 | |
530 /* Do the straightforward thing until (if ever) one run | |
531 * appears to win consistently. | |
532 */ | |
533 for (;;) { | |
534 | |
535 IFLT (*pb, *pa) | |
536 { | |
537 *dest++ = *pb++; | |
538 ++bcount; | |
539 acount = 0; | |
540 --nb; | |
541 if (nb == 0) | |
542 goto Succeed; | |
543 if (bcount >= min_gallop) | |
544 break; | |
545 } | |
546 else | |
547 { | |
548 *dest++ = *pa++; | |
549 ++acount; | |
550 bcount = 0; | |
551 --na; | |
552 if (na == 1) | |
553 goto CopyB; | |
554 if (acount >= min_gallop) | |
555 break; | |
556 } | |
557 } | |
558 | |
559 /* One run is winning so consistently that galloping may | |
560 * be a huge win. So try that, and continue galloping until | |
561 * (if ever) neither run appears to be winning consistently | |
562 * anymore. | |
563 */ | |
564 ++min_gallop; | |
565 do | |
566 { | |
567 min_gallop -= min_gallop > 1; | |
568 ms.min_gallop = min_gallop; | |
569 k = gallop_right(*pb, pa, na, 0); | |
570 acount = k; | |
571 if (k) | |
572 { | |
573 if (k < 0) | |
574 goto Fail; | |
575 memcpy(dest, pa, k * sizeof(T)); | |
576 dest += k; | |
577 pa += k; | |
578 na -= k; | |
579 if (na == 1) | |
580 goto CopyB; | |
581 /* na==0 is impossible now if the comparison | |
582 * function is consistent, but we can't assume | |
583 * that it is. | |
584 */ | |
585 if (na == 0) | |
586 goto Succeed; | |
587 } | |
588 *dest++ = *pb++; | |
589 --nb; | |
590 if (nb == 0) | |
591 goto Succeed; | |
592 | |
593 k = gallop_left(*pa, pb, nb, 0); | |
594 bcount = k; | |
595 if (k) { | |
596 if (k < 0) | |
597 goto Fail; | |
598 memmove(dest, pb, k * sizeof(T)); | |
599 dest += k; | |
600 pb += k; | |
601 nb -= k; | |
602 if (nb == 0) | |
603 goto Succeed; | |
604 } | |
605 *dest++ = *pa++; | |
606 --na; | |
607 if (na == 1) | |
608 goto CopyB; | |
609 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); | |
610 ++min_gallop; /* penalize it for leaving galloping mode */ | |
611 ms.min_gallop = min_gallop; | |
612 } | |
613 Succeed: | |
614 result = 0; | |
615 Fail: | |
616 if (na) | |
617 memcpy(dest, pa, na * sizeof(T)); | |
618 return result; | |
619 CopyB: | |
620 /* The last element of pa belongs at the end of the merge. */ | |
621 memmove(dest, pb, nb * sizeof(T)); | |
622 dest[nb] = *pa; | |
623 return 0; | |
624 } | |
625 | |
626 /* Merge the na elements starting at pa with the nb elements starting at pb | |
627 * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. | |
628 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the | |
629 * merge, and should have na >= nb. See listsort.txt for more info. | |
630 * Return 0 if successful, -1 if error. | |
631 */ | |
632 template <class T> | |
633 int | |
634 octave_sort<T>::merge_hi(T *pa, int na, T *pb, int nb) | |
635 { | |
636 int k; | |
637 T *dest; | |
638 int result = -1; /* guilty until proved innocent */ | |
639 T *basea; | |
640 T *baseb; | |
641 int min_gallop = ms.min_gallop; | |
642 | |
643 if (MERGE_GETMEM(nb) < 0) | |
644 return -1; | |
645 dest = pb + nb - 1; | |
646 memcpy(ms.a, pb, nb * sizeof(T)); | |
647 basea = pa; | |
648 baseb = ms.a; | |
649 pb = ms.a + nb - 1; | |
650 pa += na - 1; | |
651 | |
652 *dest-- = *pa--; | |
653 --na; | |
654 if (na == 0) | |
655 goto Succeed; | |
656 if (nb == 1) | |
657 goto CopyA; | |
658 | |
659 for (;;) | |
660 { | |
661 int acount = 0; /* # of times A won in a row */ | |
662 int bcount = 0; /* # of times B won in a row */ | |
663 | |
664 /* Do the straightforward thing until (if ever) one run | |
665 * appears to win consistently. | |
666 */ | |
667 for (;;) | |
668 { | |
669 IFLT (*pb, *pa) | |
670 { | |
671 *dest-- = *pa--; | |
672 ++acount; | |
673 bcount = 0; | |
674 --na; | |
675 if (na == 0) | |
676 goto Succeed; | |
677 if (acount >= min_gallop) | |
678 break; | |
679 } | |
680 else | |
681 { | |
682 *dest-- = *pb--; | |
683 ++bcount; | |
684 acount = 0; | |
685 --nb; | |
686 if (nb == 1) | |
687 goto CopyA; | |
688 if (bcount >= min_gallop) | |
689 break; | |
690 } | |
691 } | |
692 | |
693 /* One run is winning so consistently that galloping may | |
694 * be a huge win. So try that, and continue galloping until | |
695 * (if ever) neither run appears to be winning consistently | |
696 * anymore. | |
697 */ | |
698 ++min_gallop; | |
699 do | |
700 { | |
701 min_gallop -= min_gallop > 1; | |
702 ms.min_gallop = min_gallop; | |
703 k = gallop_right(*pb, basea, na, na-1); | |
704 if (k < 0) | |
705 goto Fail; | |
706 k = na - k; | |
707 acount = k; | |
708 if (k) | |
709 { | |
710 dest -= k; | |
711 pa -= k; | |
712 memmove(dest+1, pa+1, k * sizeof(T )); | |
713 na -= k; | |
714 if (na == 0) | |
715 goto Succeed; | |
716 } | |
717 *dest-- = *pb--; | |
718 --nb; | |
719 if (nb == 1) | |
720 goto CopyA; | |
721 | |
722 k = gallop_left(*pa, baseb, nb, nb-1); | |
723 if (k < 0) | |
724 goto Fail; | |
725 k = nb - k; | |
726 bcount = k; | |
727 if (k) | |
728 { | |
729 dest -= k; | |
730 pb -= k; | |
731 memcpy(dest+1, pb+1, k * sizeof(T)); | |
732 nb -= k; | |
733 if (nb == 1) | |
734 goto CopyA; | |
735 /* nb==0 is impossible now if the comparison | |
736 * function is consistent, but we can't assume | |
737 * that it is. | |
738 */ | |
739 if (nb == 0) | |
740 goto Succeed; | |
741 } | |
742 *dest-- = *pa--; | |
743 --na; | |
744 if (na == 0) | |
745 goto Succeed; | |
746 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); | |
747 ++min_gallop; /* penalize it for leaving galloping mode */ | |
748 ms.min_gallop = min_gallop; | |
749 } | |
750 Succeed: | |
751 result = 0; | |
752 Fail: | |
753 if (nb) | |
754 memcpy(dest-(nb-1), baseb, nb * sizeof(T)); | |
755 return result; | |
756 CopyA: | |
757 /* The first element of pb belongs at the front of the merge. */ | |
758 dest -= na; | |
759 pa -= na; | |
760 memmove(dest+1, pa+1, na * sizeof(T)); | |
761 *dest = *pb; | |
762 return 0; | |
763 } | |
764 | |
765 /* Merge the two runs at stack indices i and i+1. | |
766 * Returns 0 on success, -1 on error. | |
767 */ | |
768 template <class T> | |
769 int | |
770 octave_sort<T>::merge_at(int i) | |
771 { | |
772 T *pa, *pb; | |
773 int na, nb; | |
774 int k; | |
775 | |
776 pa = ms.pending[i].base; | |
777 na = ms.pending[i].len; | |
778 pb = ms.pending[i+1].base; | |
779 nb = ms.pending[i+1].len; | |
780 | |
781 /* Record the length of the combined runs; if i is the 3rd-last | |
782 * run now, also slide over the last run (which isn't involved | |
783 * in this merge). The current run i+1 goes away in any case. | |
784 */ | |
785 ms.pending[i].len = na + nb; | |
786 if (i == ms.n - 3) | |
787 ms.pending[i+1] = ms.pending[i+2]; | |
788 --ms.n; | |
789 | |
790 /* Where does b start in a? Elements in a before that can be | |
791 * ignored (already in place). | |
792 */ | |
793 k = gallop_right(*pb, pa, na, 0); | |
794 if (k < 0) | |
795 return -1; | |
796 pa += k; | |
797 na -= k; | |
798 if (na == 0) | |
799 return 0; | |
800 | |
801 /* Where does a end in b? Elements in b after that can be | |
802 * ignored (already in place). | |
803 */ | |
804 nb = gallop_left(pa[na-1], pb, nb, nb-1); | |
805 if (nb <= 0) | |
806 return nb; | |
807 | |
808 /* Merge what remains of the runs, using a temp array with | |
809 * min(na, nb) elements. | |
810 */ | |
811 if (na <= nb) | |
812 return merge_lo(pa, na, pb, nb); | |
813 else | |
814 return merge_hi(pa, na, pb, nb); | |
815 } | |
816 | |
817 /* Examine the stack of runs waiting to be merged, merging adjacent runs | |
818 * until the stack invariants are re-established: | |
819 * | |
820 * 1. len[-3] > len[-2] + len[-1] | |
821 * 2. len[-2] > len[-1] | |
822 * | |
823 * See listsort.txt for more info. | |
824 * | |
825 * Returns 0 on success, -1 on error. | |
826 */ | |
827 template <class T> | |
828 int | |
829 octave_sort<T>::merge_collapse(void) | |
830 { | |
831 struct s_slice *p = ms.pending; | |
832 | |
833 while (ms.n > 1) | |
834 { | |
835 int n = ms.n - 2; | |
836 if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len) | |
837 { | |
838 if (p[n-1].len < p[n+1].len) | |
839 --n; | |
840 if (merge_at(n) < 0) | |
841 return -1; | |
842 } | |
843 else if (p[n].len <= p[n+1].len) | |
844 { | |
845 if (merge_at(n) < 0) | |
846 return -1; | |
847 } | |
848 else | |
849 break; | |
850 } | |
851 return 0; | |
852 } | |
853 | |
854 /* Regardless of invariants, merge all runs on the stack until only one | |
855 * remains. This is used at the end of the mergesort. | |
856 * | |
857 * Returns 0 on success, -1 on error. | |
858 */ | |
859 template <class T> | |
860 int | |
861 octave_sort<T>::merge_force_collapse(void) | |
862 { | |
863 struct s_slice *p = ms.pending; | |
864 | |
865 while (ms.n > 1) | |
866 { | |
867 int n = ms.n - 2; | |
868 if (n > 0 && p[n-1].len < p[n+1].len) | |
869 --n; | |
870 if (merge_at(n) < 0) | |
871 return -1; | |
872 } | |
873 return 0; | |
874 } | |
875 | |
876 /* Compute a good value for the minimum run length; natural runs shorter | |
877 * than this are boosted artificially via binary insertion. | |
878 * | |
879 * If n < 64, return n (it's too small to bother with fancy stuff). | |
880 * Else if n is an exact power of 2, return 32. | |
881 * Else return an int k, 32 <= k <= 64, such that n/k is close to, but | |
882 * strictly less than, an exact power of 2. | |
883 * | |
884 * See listsort.txt for more info. | |
885 */ | |
886 template <class T> | |
887 int | |
888 octave_sort<T>::merge_compute_minrun(int n) | |
889 { | |
890 int r = 0; /* becomes 1 if any 1 bits are shifted off */ | |
891 | |
892 while (n >= 64) { | |
893 r |= n & 1; | |
894 n >>= 1; | |
895 } | |
896 return n + r; | |
897 } | |
898 | |
899 template <class T> | |
900 void | |
901 octave_sort<T>::sort (T *v, int elements) | |
902 { | |
903 /* Re-initialize the Mergestate as this might be the second time called */ | |
904 ms.n = 0; | |
905 ms.min_gallop = MIN_GALLOP; | |
906 | |
907 if (elements > 1) | |
908 { | |
909 int nremaining = elements; | |
910 T *lo = v; | |
911 T *hi = v + elements; | |
912 | |
913 /* March over the array once, left to right, finding natural runs, | |
914 * and extending short natural runs to minrun elements. | |
915 */ | |
916 int minrun = merge_compute_minrun(nremaining); | |
917 do | |
918 { | |
919 int descending; | |
920 int n; | |
921 | |
922 /* Identify next run. */ | |
923 n = count_run(lo, hi, &descending); | |
924 if (n < 0) | |
925 goto fail; | |
926 if (descending) | |
927 reverse_slice(lo, lo + n); | |
928 /* If short, extend to min(minrun, nremaining). */ | |
929 if (n < minrun) | |
930 { | |
931 const int force = nremaining <= minrun ? nremaining : minrun; | |
932 binarysort(lo, lo + force, lo + n); | |
933 n = force; | |
934 } | |
935 /* Push run onto pending-runs stack, and maybe merge. */ | |
936 assert(ms.n < MAX_MERGE_PENDING); | |
937 ms.pending[ms.n].base = lo; | |
938 ms.pending[ms.n].len = n; | |
939 ++ms.n; | |
940 if (merge_collapse( ) < 0) | |
941 goto fail; | |
942 /* Advance to find next run. */ | |
943 lo += n; | |
944 nremaining -= n; | |
945 } while (nremaining); | |
946 | |
947 merge_force_collapse( ); | |
948 } | |
949 | |
950 fail: | |
951 return; | |
952 } | |
953 | |
954 /* | |
955 ;;; Local Variables: *** | |
956 ;;; mode: C++ *** | |
957 ;;; End: *** | |
958 */ |