comparison liboctave/oct-sort.cc @ 4851:047ff938b0d9

[project @ 2004-04-06 17:12:14 by jwe]
author jwe
date Tue, 06 Apr 2004 17:14:17 +0000
parents
children 7514d69b422a
comparison
equal deleted inserted replaced
4850:8cc4818a0de0 4851:047ff938b0d9
1 /*
2 Copyright (C) 2003 David Bateman
3
4 This file is part of Octave.
5
6 Octave is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 Octave is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with Octave; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19
20 Code stolen in large part from Python's, listobject.c, which itself had
21 no license header. However, thanks to Tim Peters for the parts of the
22 code I ripped-off.
23
24 As required in the Python license the short description of the changes
25 made are
26
27 * convert the sorting code in listobject.cc into a generic class,
28 replacing PyObject* with the type of the class T.
29
30 The Python license is
31
32 PSF LICENSE AGREEMENT FOR PYTHON 2.3
33 --------------------------------------
34
35 1. This LICENSE AGREEMENT is between the Python Software Foundation
36 ("PSF"), and the Individual or Organization ("Licensee") accessing and
37 otherwise using Python 2.3 software in source or binary form and its
38 associated documentation.
39
40 2. Subject to the terms and conditions of this License Agreement, PSF
41 hereby grants Licensee a nonexclusive, royalty-free, world-wide
42 license to reproduce, analyze, test, perform and/or display publicly,
43 prepare derivative works, distribute, and otherwise use Python 2.3
44 alone or in any derivative version, provided, however, that PSF's
45 License Agreement and PSF's notice of copyright, i.e., "Copyright (c)
46 2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are
47 retained in Python 2.3 alone or in any derivative version prepared by
48 Licensee.
49
50 3. In the event Licensee prepares a derivative work that is based on
51 or incorporates Python 2.3 or any part thereof, and wants to make
52 the derivative work available to others as provided herein, then
53 Licensee hereby agrees to include in any such work a brief summary of
54 the changes made to Python 2.3.
55
56 4. PSF is making Python 2.3 available to Licensee on an "AS IS"
57 basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
58 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
59 DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
60 FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT
61 INFRINGE ANY THIRD PARTY RIGHTS.
62
63 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
64 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
65 A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3,
66 OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
67
68 6. This License Agreement will automatically terminate upon a material
69 breach of its terms and conditions.
70
71 7. Nothing in this License Agreement shall be deemed to create any
72 relationship of agency, partnership, or joint venture between PSF and
73 Licensee. This License Agreement does not grant permission to use PSF
74 trademarks or trade name in a trademark sense to endorse or promote
75 products or services of Licensee, or any third party.
76
77 8. By copying, installing or otherwise using Python 2.3, Licensee
78 agrees to be bound by the terms and conditions of this License
79 Agreement.
80 */
81
82 #ifdef HAVE_CONFIG_H
83 #include <config.h>
84 #endif
85
86 #include "oct-obj.h"
87 #include "lo-mappers.h"
88 #include "quit.h"
89 #include "oct-sort.h"
90
91 #define IFLT(a,b) if (compare == NULL ? ((a) < (b)) : compare ((a), (b)))
92
93 template <class T>
94 octave_sort<T>::octave_sort (void) : compare (NULL)
95 {
96 merge_init ( );
97 merge_getmem (1024);
98 }
99
100 template <class T>
101 octave_sort<T>::octave_sort (bool (*comp) (T, T)) : compare (comp)
102 {
103 merge_init ( );
104 merge_getmem (1024);
105 }
106
107 /* Reverse a slice of a list in place, from lo up to (exclusive) hi. */
108 template <class T>
109 void
110 octave_sort<T>::reverse_slice(T *lo, T *hi)
111 {
112 --hi;
113 while (lo < hi)
114 {
115 T t = *lo;
116 *lo = *hi;
117 *hi = t;
118 ++lo;
119 --hi;
120 }
121 }
122
123 template <class T>
124 void
125 octave_sort<T>::binarysort (T *lo, T *hi, T *start)
126 {
127 register T *l, *p, *r;
128 register T pivot;
129
130 if (lo == start)
131 ++start;
132
133 for (; start < hi; ++start)
134 {
135 /* set l to where *start belongs */
136 l = lo;
137 r = start;
138 pivot = *r;
139 /* Invariants:
140 * pivot >= all in [lo, l).
141 * pivot < all in [r, start).
142 * The second is vacuously true at the start.
143 */
144 do
145 {
146 p = l + ((r - l) >> 1);
147 IFLT (pivot, *p)
148 r = p;
149 else
150 l = p+1;
151 }
152 while (l < r);
153 /* The invariants still hold, so pivot >= all in [lo, l) and
154 pivot < all in [l, start), so pivot belongs at l. Note
155 that if there are elements equal to pivot, l points to the
156 first slot after them -- that's why this sort is stable.
157 Slide over to make room.
158 Caution: using memmove is much slower under MSVC 5;
159 we're not usually moving many slots. */
160 for (p = start; p > l; --p)
161 *p = *(p-1);
162 *l = pivot;
163 }
164
165 return;
166 }
167
168 /*
169 Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi
170 is required on entry. "A run" is the longest ascending sequence, with
171
172 lo[0] <= lo[1] <= lo[2] <= ...
173
174 or the longest descending sequence, with
175
176 lo[0] > lo[1] > lo[2] > ...
177
178 Boolean *descending is set to 0 in the former case, or to 1 in the latter.
179 For its intended use in a stable mergesort, the strictness of the defn of
180 "descending" is needed so that the caller can safely reverse a descending
181 sequence without violating stability (strict > ensures there are no equal
182 elements to get out of order).
183
184 Returns -1 in case of error.
185 */
186 template <class T>
187 int
188 octave_sort<T>::count_run(T *lo, T *hi, int *descending)
189 {
190 int n;
191
192 *descending = 0;
193 ++lo;
194 if (lo == hi)
195 return 1;
196
197 n = 2;
198
199 IFLT (*lo, *(lo-1))
200 {
201 *descending = 1;
202 for (lo = lo+1; lo < hi; ++lo, ++n)
203 {
204 IFLT (*lo, *(lo-1))
205 ;
206 else
207 break;
208 }
209 }
210 else
211 {
212 for (lo = lo+1; lo < hi; ++lo, ++n)
213 {
214 IFLT (*lo, *(lo-1))
215 break;
216 }
217 }
218
219 return n;
220 }
221
222 /*
223 Locate the proper position of key in a sorted vector; if the vector contains
224 an element equal to key, return the position immediately to the left of
225 the leftmost equal element. [gallop_right() does the same except returns
226 the position to the right of the rightmost equal element (if any).]
227
228 "a" is a sorted vector with n elements, starting at a[0]. n must be > 0.
229
230 "hint" is an index at which to begin the search, 0 <= hint < n. The closer
231 hint is to the final result, the faster this runs.
232
233 The return value is the int k in 0..n such that
234
235 a[k-1] < key <= a[k]
236
237 pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW,
238 key belongs at index k; or, IOW, the first k elements of a should precede
239 key, and the last n-k should follow key.
240
241 Returns -1 on error. See listsort.txt for info on the method.
242 */
243 template <class T>
244 int
245 octave_sort<T>::gallop_left(T key, T *a, int n, int hint)
246 {
247 int ofs;
248 int lastofs;
249 int k;
250
251 a += hint;
252 lastofs = 0;
253 ofs = 1;
254 IFLT (*a, key)
255 {
256 /* a[hint] < key -- gallop right, until
257 * a[hint + lastofs] < key <= a[hint + ofs]
258 */
259 const int maxofs = n - hint; /* &a[n-1] is highest */
260 while (ofs < maxofs)
261 {
262 IFLT (a[ofs], key)
263 {
264 lastofs = ofs;
265 ofs = (ofs << 1) + 1;
266 if (ofs <= 0) /* int overflow */
267 ofs = maxofs;
268 }
269 else /* key <= a[hint + ofs] */
270 break;
271 }
272 if (ofs > maxofs)
273 ofs = maxofs;
274 /* Translate back to offsets relative to &a[0]. */
275 lastofs += hint;
276 ofs += hint;
277 }
278 else
279 {
280 /* key <= a[hint] -- gallop left, until
281 * a[hint - ofs] < key <= a[hint - lastofs]
282 */
283 const int maxofs = hint + 1; /* &a[0] is lowest */
284 while (ofs < maxofs)
285 {
286 IFLT (*(a-ofs), key)
287 break;
288 /* key <= a[hint - ofs] */
289 lastofs = ofs;
290 ofs = (ofs << 1) + 1;
291 if (ofs <= 0) /* int overflow */
292 ofs = maxofs;
293 }
294 if (ofs > maxofs)
295 ofs = maxofs;
296 /* Translate back to positive offsets relative to &a[0]. */
297 k = lastofs;
298 lastofs = hint - ofs;
299 ofs = hint - k;
300 }
301 a -= hint;
302
303 /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
304 * right of lastofs but no farther right than ofs. Do a binary
305 * search, with invariant a[lastofs-1] < key <= a[ofs].
306 */
307 ++lastofs;
308 while (lastofs < ofs)
309 {
310 int m = lastofs + ((ofs - lastofs) >> 1);
311
312 IFLT (a[m], key)
313 lastofs = m+1; /* a[m] < key */
314 else
315 ofs = m; /* key <= a[m] */
316 }
317 return ofs;
318 }
319
320 /*
321 Exactly like gallop_left(), except that if key already exists in a[0:n],
322 finds the position immediately to the right of the rightmost equal value.
323
324 The return value is the int k in 0..n such that
325
326 a[k-1] <= key < a[k]
327
328 or -1 if error.
329
330 The code duplication is massive, but this is enough different given that
331 we're sticking to "<" comparisons that it's much harder to follow if
332 written as one routine with yet another "left or right?" flag.
333 */
334 template <class T>
335 int
336 octave_sort<T>::gallop_right(T key, T *a, int n, int hint)
337 {
338 int ofs;
339 int lastofs;
340 int k;
341
342 a += hint;
343 lastofs = 0;
344 ofs = 1;
345 IFLT (key, *a)
346 {
347 /* key < a[hint] -- gallop left, until
348 * a[hint - ofs] <= key < a[hint - lastofs]
349 */
350 const int maxofs = hint + 1; /* &a[0] is lowest */
351 while (ofs < maxofs)
352 {
353 IFLT (key, *(a-ofs))
354 {
355 lastofs = ofs;
356 ofs = (ofs << 1) + 1;
357 if (ofs <= 0) /* int overflow */
358 ofs = maxofs;
359 }
360 else /* a[hint - ofs] <= key */
361 break;
362 }
363 if (ofs > maxofs)
364 ofs = maxofs;
365 /* Translate back to positive offsets relative to &a[0]. */
366 k = lastofs;
367 lastofs = hint - ofs;
368 ofs = hint - k;
369 }
370 else
371 {
372 /* a[hint] <= key -- gallop right, until
373 * a[hint + lastofs] <= key < a[hint + ofs]
374 */
375 const int maxofs = n - hint; /* &a[n-1] is highest */
376 while (ofs < maxofs)
377 {
378 IFLT (key, a[ofs])
379 break;
380 /* a[hint + ofs] <= key */
381 lastofs = ofs;
382 ofs = (ofs << 1) + 1;
383 if (ofs <= 0) /* int overflow */
384 ofs = maxofs;
385 }
386 if (ofs > maxofs)
387 ofs = maxofs;
388 /* Translate back to offsets relative to &a[0]. */
389 lastofs += hint;
390 ofs += hint;
391 }
392 a -= hint;
393
394 /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the
395 * right of lastofs but no farther right than ofs. Do a binary
396 * search, with invariant a[lastofs-1] <= key < a[ofs].
397 */
398 ++lastofs;
399 while (lastofs < ofs)
400 {
401 int m = lastofs + ((ofs - lastofs) >> 1);
402
403 IFLT (key, a[m])
404 ofs = m; /* key < a[m] */
405 else
406 lastofs = m+1; /* a[m] <= key */
407 }
408 return ofs;
409 }
410
411 /* Conceptually a MergeState's constructor. */
412 template <class T>
413 void
414 octave_sort<T>::merge_init(void)
415 {
416 ms.a = NULL;
417 ms.alloced = 0;
418 ms.n = 0;
419 ms.min_gallop = MIN_GALLOP;
420 }
421
422 /* Free all the temp memory owned by the MergeState. This must be called
423 * when you're done with a MergeState, and may be called before then if
424 * you want to free the temp memory early.
425 */
426 template <class T>
427 void
428 octave_sort<T>::merge_freemem(void)
429 {
430 if (ms.a)
431 free (ms.a);
432 ms.alloced = 0;
433 ms.a = NULL;
434 }
435
436 static inline int
437 roundupsize(int n)
438 {
439 unsigned int nbits = 3;
440 unsigned int n2 = (unsigned int)n >> 8;
441
442 /* Round up:
443 * If n < 256, to a multiple of 8.
444 * If n < 2048, to a multiple of 64.
445 * If n < 16384, to a multiple of 512.
446 * If n < 131072, to a multiple of 4096.
447 * If n < 1048576, to a multiple of 32768.
448 * If n < 8388608, to a multiple of 262144.
449 * If n < 67108864, to a multiple of 2097152.
450 * If n < 536870912, to a multiple of 16777216.
451 * ...
452 * If n < 2**(5+3*i), to a multiple of 2**(3*i).
453 *
454 * This over-allocates proportional to the list size, making room
455 * for additional growth. The over-allocation is mild, but is
456 * enough to give linear-time amortized behavior over a long
457 * sequence of appends() in the presence of a poorly-performing
458 * system realloc() (which is a reality, e.g., across all flavors
459 * of Windows, with Win9x behavior being particularly bad -- and
460 * we've still got address space fragmentation problems on Win9x
461 * even with this scheme, although it requires much longer lists to
462 * provoke them than it used to).
463 */
464 while (n2) {
465 n2 >>= 3;
466 nbits += 3;
467 }
468 return ((n >> nbits) + 1) << nbits;
469 }
470
471 /* Ensure enough temp memory for 'need' array slots is available.
472 * Returns 0 on success and -1 if the memory can't be gotten.
473 */
474 template <class T>
475 int
476 octave_sort<T>::merge_getmem(int need)
477 {
478 if (need <= ms.alloced)
479 return 0;
480
481 need = roundupsize(need);
482 /* Don't realloc! That can cost cycles to copy the old data, but
483 * we don't care what's in the block.
484 */
485 merge_freemem( );
486 ms.a = (T *) malloc (need * sizeof (T));
487 if (ms.a) {
488 ms.alloced = need;
489 return 0;
490 }
491 merge_freemem( ); /* reset to sane state */
492 return -1;
493 }
494
495 #define MERGE_GETMEM(NEED) ((NEED) <= ms.alloced ? 0 : \
496 merge_getmem(NEED))
497
498 /* Merge the na elements starting at pa with the nb elements starting at pb
499 * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
500 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
501 * merge, and should have na <= nb. See listsort.txt for more info.
502 * Return 0 if successful, -1 if error.
503 */
504 template <class T>
505 int
506 octave_sort<T>::merge_lo(T *pa, int na, T *pb, int nb)
507 {
508 int k;
509 T *dest;
510 int result = -1; /* guilty until proved innocent */
511 int min_gallop = ms.min_gallop;
512
513 if (MERGE_GETMEM(na) < 0)
514 return -1;
515 memcpy(ms.a, pa, na * sizeof(T));
516 dest = pa;
517 pa = ms.a;
518
519 *dest++ = *pb++;
520 --nb;
521 if (nb == 0)
522 goto Succeed;
523 if (na == 1)
524 goto CopyB;
525
526 for (;;) {
527 int acount = 0; /* # of times A won in a row */
528 int bcount = 0; /* # of times B won in a row */
529
530 /* Do the straightforward thing until (if ever) one run
531 * appears to win consistently.
532 */
533 for (;;) {
534
535 IFLT (*pb, *pa)
536 {
537 *dest++ = *pb++;
538 ++bcount;
539 acount = 0;
540 --nb;
541 if (nb == 0)
542 goto Succeed;
543 if (bcount >= min_gallop)
544 break;
545 }
546 else
547 {
548 *dest++ = *pa++;
549 ++acount;
550 bcount = 0;
551 --na;
552 if (na == 1)
553 goto CopyB;
554 if (acount >= min_gallop)
555 break;
556 }
557 }
558
559 /* One run is winning so consistently that galloping may
560 * be a huge win. So try that, and continue galloping until
561 * (if ever) neither run appears to be winning consistently
562 * anymore.
563 */
564 ++min_gallop;
565 do
566 {
567 min_gallop -= min_gallop > 1;
568 ms.min_gallop = min_gallop;
569 k = gallop_right(*pb, pa, na, 0);
570 acount = k;
571 if (k)
572 {
573 if (k < 0)
574 goto Fail;
575 memcpy(dest, pa, k * sizeof(T));
576 dest += k;
577 pa += k;
578 na -= k;
579 if (na == 1)
580 goto CopyB;
581 /* na==0 is impossible now if the comparison
582 * function is consistent, but we can't assume
583 * that it is.
584 */
585 if (na == 0)
586 goto Succeed;
587 }
588 *dest++ = *pb++;
589 --nb;
590 if (nb == 0)
591 goto Succeed;
592
593 k = gallop_left(*pa, pb, nb, 0);
594 bcount = k;
595 if (k) {
596 if (k < 0)
597 goto Fail;
598 memmove(dest, pb, k * sizeof(T));
599 dest += k;
600 pb += k;
601 nb -= k;
602 if (nb == 0)
603 goto Succeed;
604 }
605 *dest++ = *pa++;
606 --na;
607 if (na == 1)
608 goto CopyB;
609 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
610 ++min_gallop; /* penalize it for leaving galloping mode */
611 ms.min_gallop = min_gallop;
612 }
613 Succeed:
614 result = 0;
615 Fail:
616 if (na)
617 memcpy(dest, pa, na * sizeof(T));
618 return result;
619 CopyB:
620 /* The last element of pa belongs at the end of the merge. */
621 memmove(dest, pb, nb * sizeof(T));
622 dest[nb] = *pa;
623 return 0;
624 }
625
626 /* Merge the na elements starting at pa with the nb elements starting at pb
627 * in a stable way, in-place. na and nb must be > 0, and pa + na == pb.
628 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the
629 * merge, and should have na >= nb. See listsort.txt for more info.
630 * Return 0 if successful, -1 if error.
631 */
632 template <class T>
633 int
634 octave_sort<T>::merge_hi(T *pa, int na, T *pb, int nb)
635 {
636 int k;
637 T *dest;
638 int result = -1; /* guilty until proved innocent */
639 T *basea;
640 T *baseb;
641 int min_gallop = ms.min_gallop;
642
643 if (MERGE_GETMEM(nb) < 0)
644 return -1;
645 dest = pb + nb - 1;
646 memcpy(ms.a, pb, nb * sizeof(T));
647 basea = pa;
648 baseb = ms.a;
649 pb = ms.a + nb - 1;
650 pa += na - 1;
651
652 *dest-- = *pa--;
653 --na;
654 if (na == 0)
655 goto Succeed;
656 if (nb == 1)
657 goto CopyA;
658
659 for (;;)
660 {
661 int acount = 0; /* # of times A won in a row */
662 int bcount = 0; /* # of times B won in a row */
663
664 /* Do the straightforward thing until (if ever) one run
665 * appears to win consistently.
666 */
667 for (;;)
668 {
669 IFLT (*pb, *pa)
670 {
671 *dest-- = *pa--;
672 ++acount;
673 bcount = 0;
674 --na;
675 if (na == 0)
676 goto Succeed;
677 if (acount >= min_gallop)
678 break;
679 }
680 else
681 {
682 *dest-- = *pb--;
683 ++bcount;
684 acount = 0;
685 --nb;
686 if (nb == 1)
687 goto CopyA;
688 if (bcount >= min_gallop)
689 break;
690 }
691 }
692
693 /* One run is winning so consistently that galloping may
694 * be a huge win. So try that, and continue galloping until
695 * (if ever) neither run appears to be winning consistently
696 * anymore.
697 */
698 ++min_gallop;
699 do
700 {
701 min_gallop -= min_gallop > 1;
702 ms.min_gallop = min_gallop;
703 k = gallop_right(*pb, basea, na, na-1);
704 if (k < 0)
705 goto Fail;
706 k = na - k;
707 acount = k;
708 if (k)
709 {
710 dest -= k;
711 pa -= k;
712 memmove(dest+1, pa+1, k * sizeof(T ));
713 na -= k;
714 if (na == 0)
715 goto Succeed;
716 }
717 *dest-- = *pb--;
718 --nb;
719 if (nb == 1)
720 goto CopyA;
721
722 k = gallop_left(*pa, baseb, nb, nb-1);
723 if (k < 0)
724 goto Fail;
725 k = nb - k;
726 bcount = k;
727 if (k)
728 {
729 dest -= k;
730 pb -= k;
731 memcpy(dest+1, pb+1, k * sizeof(T));
732 nb -= k;
733 if (nb == 1)
734 goto CopyA;
735 /* nb==0 is impossible now if the comparison
736 * function is consistent, but we can't assume
737 * that it is.
738 */
739 if (nb == 0)
740 goto Succeed;
741 }
742 *dest-- = *pa--;
743 --na;
744 if (na == 0)
745 goto Succeed;
746 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
747 ++min_gallop; /* penalize it for leaving galloping mode */
748 ms.min_gallop = min_gallop;
749 }
750 Succeed:
751 result = 0;
752 Fail:
753 if (nb)
754 memcpy(dest-(nb-1), baseb, nb * sizeof(T));
755 return result;
756 CopyA:
757 /* The first element of pb belongs at the front of the merge. */
758 dest -= na;
759 pa -= na;
760 memmove(dest+1, pa+1, na * sizeof(T));
761 *dest = *pb;
762 return 0;
763 }
764
765 /* Merge the two runs at stack indices i and i+1.
766 * Returns 0 on success, -1 on error.
767 */
768 template <class T>
769 int
770 octave_sort<T>::merge_at(int i)
771 {
772 T *pa, *pb;
773 int na, nb;
774 int k;
775
776 pa = ms.pending[i].base;
777 na = ms.pending[i].len;
778 pb = ms.pending[i+1].base;
779 nb = ms.pending[i+1].len;
780
781 /* Record the length of the combined runs; if i is the 3rd-last
782 * run now, also slide over the last run (which isn't involved
783 * in this merge). The current run i+1 goes away in any case.
784 */
785 ms.pending[i].len = na + nb;
786 if (i == ms.n - 3)
787 ms.pending[i+1] = ms.pending[i+2];
788 --ms.n;
789
790 /* Where does b start in a? Elements in a before that can be
791 * ignored (already in place).
792 */
793 k = gallop_right(*pb, pa, na, 0);
794 if (k < 0)
795 return -1;
796 pa += k;
797 na -= k;
798 if (na == 0)
799 return 0;
800
801 /* Where does a end in b? Elements in b after that can be
802 * ignored (already in place).
803 */
804 nb = gallop_left(pa[na-1], pb, nb, nb-1);
805 if (nb <= 0)
806 return nb;
807
808 /* Merge what remains of the runs, using a temp array with
809 * min(na, nb) elements.
810 */
811 if (na <= nb)
812 return merge_lo(pa, na, pb, nb);
813 else
814 return merge_hi(pa, na, pb, nb);
815 }
816
817 /* Examine the stack of runs waiting to be merged, merging adjacent runs
818 * until the stack invariants are re-established:
819 *
820 * 1. len[-3] > len[-2] + len[-1]
821 * 2. len[-2] > len[-1]
822 *
823 * See listsort.txt for more info.
824 *
825 * Returns 0 on success, -1 on error.
826 */
827 template <class T>
828 int
829 octave_sort<T>::merge_collapse(void)
830 {
831 struct s_slice *p = ms.pending;
832
833 while (ms.n > 1)
834 {
835 int n = ms.n - 2;
836 if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len)
837 {
838 if (p[n-1].len < p[n+1].len)
839 --n;
840 if (merge_at(n) < 0)
841 return -1;
842 }
843 else if (p[n].len <= p[n+1].len)
844 {
845 if (merge_at(n) < 0)
846 return -1;
847 }
848 else
849 break;
850 }
851 return 0;
852 }
853
854 /* Regardless of invariants, merge all runs on the stack until only one
855 * remains. This is used at the end of the mergesort.
856 *
857 * Returns 0 on success, -1 on error.
858 */
859 template <class T>
860 int
861 octave_sort<T>::merge_force_collapse(void)
862 {
863 struct s_slice *p = ms.pending;
864
865 while (ms.n > 1)
866 {
867 int n = ms.n - 2;
868 if (n > 0 && p[n-1].len < p[n+1].len)
869 --n;
870 if (merge_at(n) < 0)
871 return -1;
872 }
873 return 0;
874 }
875
876 /* Compute a good value for the minimum run length; natural runs shorter
877 * than this are boosted artificially via binary insertion.
878 *
879 * If n < 64, return n (it's too small to bother with fancy stuff).
880 * Else if n is an exact power of 2, return 32.
881 * Else return an int k, 32 <= k <= 64, such that n/k is close to, but
882 * strictly less than, an exact power of 2.
883 *
884 * See listsort.txt for more info.
885 */
886 template <class T>
887 int
888 octave_sort<T>::merge_compute_minrun(int n)
889 {
890 int r = 0; /* becomes 1 if any 1 bits are shifted off */
891
892 while (n >= 64) {
893 r |= n & 1;
894 n >>= 1;
895 }
896 return n + r;
897 }
898
899 template <class T>
900 void
901 octave_sort<T>::sort (T *v, int elements)
902 {
903 /* Re-initialize the Mergestate as this might be the second time called */
904 ms.n = 0;
905 ms.min_gallop = MIN_GALLOP;
906
907 if (elements > 1)
908 {
909 int nremaining = elements;
910 T *lo = v;
911 T *hi = v + elements;
912
913 /* March over the array once, left to right, finding natural runs,
914 * and extending short natural runs to minrun elements.
915 */
916 int minrun = merge_compute_minrun(nremaining);
917 do
918 {
919 int descending;
920 int n;
921
922 /* Identify next run. */
923 n = count_run(lo, hi, &descending);
924 if (n < 0)
925 goto fail;
926 if (descending)
927 reverse_slice(lo, lo + n);
928 /* If short, extend to min(minrun, nremaining). */
929 if (n < minrun)
930 {
931 const int force = nremaining <= minrun ? nremaining : minrun;
932 binarysort(lo, lo + force, lo + n);
933 n = force;
934 }
935 /* Push run onto pending-runs stack, and maybe merge. */
936 assert(ms.n < MAX_MERGE_PENDING);
937 ms.pending[ms.n].base = lo;
938 ms.pending[ms.n].len = n;
939 ++ms.n;
940 if (merge_collapse( ) < 0)
941 goto fail;
942 /* Advance to find next run. */
943 lo += n;
944 nremaining -= n;
945 } while (nremaining);
946
947 merge_force_collapse( );
948 }
949
950 fail:
951 return;
952 }
953
954 /*
955 ;;; Local Variables: ***
956 ;;; mode: C++ ***
957 ;;; End: ***
958 */