4851
|
1 /* |
7017
|
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 David Bateman |
4851
|
3 |
|
4 This file is part of Octave. |
|
5 |
|
6 Octave is free software; you can redistribute it and/or modify it |
|
7 under the terms of the GNU General Public License as published by the |
7016
|
8 Free Software Foundation; either version 3 of the License, or (at your |
|
9 option) any later version. |
4851
|
10 |
|
11 Octave is distributed in the hope that it will be useful, but WITHOUT |
|
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
|
14 for more details. |
|
15 |
|
16 You should have received a copy of the GNU General Public License |
7016
|
17 along with Octave; see the file COPYING. If not, see |
|
18 <http://www.gnu.org/licenses/>. |
4851
|
19 |
|
20 Code stolen in large part from Python's, listobject.c, which itself had |
|
21 no license header. However, thanks to Tim Peters for the parts of the |
|
22 code I ripped-off. |
|
23 |
|
24 As required in the Python license the short description of the changes |
|
25 made are |
|
26 |
|
27 * convert the sorting code in listobject.cc into a generic class, |
|
28 replacing PyObject* with the type of the class T. |
|
29 |
|
30 The Python license is |
|
31 |
|
32 PSF LICENSE AGREEMENT FOR PYTHON 2.3 |
|
33 -------------------------------------- |
|
34 |
|
35 1. This LICENSE AGREEMENT is between the Python Software Foundation |
|
36 ("PSF"), and the Individual or Organization ("Licensee") accessing and |
|
37 otherwise using Python 2.3 software in source or binary form and its |
|
38 associated documentation. |
|
39 |
|
40 2. Subject to the terms and conditions of this License Agreement, PSF |
|
41 hereby grants Licensee a nonexclusive, royalty-free, world-wide |
|
42 license to reproduce, analyze, test, perform and/or display publicly, |
|
43 prepare derivative works, distribute, and otherwise use Python 2.3 |
|
44 alone or in any derivative version, provided, however, that PSF's |
|
45 License Agreement and PSF's notice of copyright, i.e., "Copyright (c) |
|
46 2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are |
|
47 retained in Python 2.3 alone or in any derivative version prepared by |
|
48 Licensee. |
|
49 |
|
50 3. In the event Licensee prepares a derivative work that is based on |
|
51 or incorporates Python 2.3 or any part thereof, and wants to make |
|
52 the derivative work available to others as provided herein, then |
|
53 Licensee hereby agrees to include in any such work a brief summary of |
|
54 the changes made to Python 2.3. |
|
55 |
|
56 4. PSF is making Python 2.3 available to Licensee on an "AS IS" |
|
57 basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR |
|
58 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND |
|
59 DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS |
|
60 FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT |
|
61 INFRINGE ANY THIRD PARTY RIGHTS. |
|
62 |
|
63 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON |
|
64 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS |
|
65 A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3, |
|
66 OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. |
|
67 |
|
68 6. This License Agreement will automatically terminate upon a material |
|
69 breach of its terms and conditions. |
|
70 |
|
71 7. Nothing in this License Agreement shall be deemed to create any |
|
72 relationship of agency, partnership, or joint venture between PSF and |
|
73 Licensee. This License Agreement does not grant permission to use PSF |
|
74 trademarks or trade name in a trademark sense to endorse or promote |
|
75 products or services of Licensee, or any third party. |
|
76 |
|
77 8. By copying, installing or otherwise using Python 2.3, Licensee |
|
78 agrees to be bound by the terms and conditions of this License |
|
79 Agreement. |
|
80 */ |
|
81 |
|
82 #ifdef HAVE_CONFIG_H |
|
83 #include <config.h> |
|
84 #endif |
|
85 |
5883
|
86 #include <cassert> |
6482
|
87 #include <cstdlib> |
7480
|
88 #include <cstring> |
5883
|
89 |
4851
|
90 #include "lo-mappers.h" |
|
91 #include "quit.h" |
|
92 #include "oct-sort.h" |
|
93 |
7433
|
94 #ifndef IFLT |
7234
|
95 #define IFLT(a,b) if (compare ? compare ((a), (b)) : ((a) < (b))) |
7433
|
96 #endif |
4851
|
97 |
|
98 template <class T> |
7234
|
99 octave_sort<T>::octave_sort (void) : compare (0) |
4851
|
100 { |
7234
|
101 merge_init (); |
4851
|
102 merge_getmem (1024); |
|
103 } |
|
104 |
|
105 template <class T> |
|
106 octave_sort<T>::octave_sort (bool (*comp) (T, T)) : compare (comp) |
|
107 { |
4998
|
108 merge_init (); |
4851
|
109 merge_getmem (1024); |
|
110 } |
|
111 |
|
112 /* Reverse a slice of a list in place, from lo up to (exclusive) hi. */ |
|
113 template <class T> |
|
114 void |
7234
|
115 octave_sort<T>::reverse_slice (T *lo, T *hi) |
4851
|
116 { |
|
117 --hi; |
|
118 while (lo < hi) |
|
119 { |
|
120 T t = *lo; |
|
121 *lo = *hi; |
|
122 *hi = t; |
|
123 ++lo; |
|
124 --hi; |
|
125 } |
|
126 } |
|
127 |
|
128 template <class T> |
|
129 void |
|
130 octave_sort<T>::binarysort (T *lo, T *hi, T *start) |
|
131 { |
6959
|
132 T *l, *p, *r; |
|
133 T pivot; |
4851
|
134 |
|
135 if (lo == start) |
|
136 ++start; |
|
137 |
|
138 for (; start < hi; ++start) |
|
139 { |
|
140 /* set l to where *start belongs */ |
|
141 l = lo; |
|
142 r = start; |
|
143 pivot = *r; |
|
144 /* Invariants: |
|
145 * pivot >= all in [lo, l). |
|
146 * pivot < all in [r, start). |
|
147 * The second is vacuously true at the start. |
|
148 */ |
|
149 do |
|
150 { |
|
151 p = l + ((r - l) >> 1); |
|
152 IFLT (pivot, *p) |
|
153 r = p; |
|
154 else |
|
155 l = p+1; |
|
156 } |
|
157 while (l < r); |
|
158 /* The invariants still hold, so pivot >= all in [lo, l) and |
|
159 pivot < all in [l, start), so pivot belongs at l. Note |
|
160 that if there are elements equal to pivot, l points to the |
|
161 first slot after them -- that's why this sort is stable. |
|
162 Slide over to make room. |
|
163 Caution: using memmove is much slower under MSVC 5; |
|
164 we're not usually moving many slots. */ |
|
165 for (p = start; p > l; --p) |
|
166 *p = *(p-1); |
|
167 *l = pivot; |
|
168 } |
|
169 |
|
170 return; |
|
171 } |
|
172 |
|
173 /* |
|
174 Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi |
|
175 is required on entry. "A run" is the longest ascending sequence, with |
|
176 |
|
177 lo[0] <= lo[1] <= lo[2] <= ... |
|
178 |
|
179 or the longest descending sequence, with |
|
180 |
|
181 lo[0] > lo[1] > lo[2] > ... |
|
182 |
|
183 Boolean *descending is set to 0 in the former case, or to 1 in the latter. |
|
184 For its intended use in a stable mergesort, the strictness of the defn of |
|
185 "descending" is needed so that the caller can safely reverse a descending |
|
186 sequence without violating stability (strict > ensures there are no equal |
|
187 elements to get out of order). |
|
188 |
|
189 Returns -1 in case of error. |
|
190 */ |
|
191 template <class T> |
7433
|
192 octave_idx_type |
7234
|
193 octave_sort<T>::count_run (T *lo, T *hi, int *descending) |
4851
|
194 { |
7433
|
195 octave_idx_type n; |
4851
|
196 |
|
197 *descending = 0; |
|
198 ++lo; |
|
199 if (lo == hi) |
|
200 return 1; |
|
201 |
|
202 n = 2; |
|
203 |
|
204 IFLT (*lo, *(lo-1)) |
|
205 { |
|
206 *descending = 1; |
|
207 for (lo = lo+1; lo < hi; ++lo, ++n) |
|
208 { |
|
209 IFLT (*lo, *(lo-1)) |
|
210 ; |
|
211 else |
|
212 break; |
|
213 } |
|
214 } |
|
215 else |
|
216 { |
|
217 for (lo = lo+1; lo < hi; ++lo, ++n) |
|
218 { |
|
219 IFLT (*lo, *(lo-1)) |
|
220 break; |
|
221 } |
|
222 } |
|
223 |
|
224 return n; |
|
225 } |
|
226 |
|
227 /* |
|
228 Locate the proper position of key in a sorted vector; if the vector contains |
|
229 an element equal to key, return the position immediately to the left of |
|
230 the leftmost equal element. [gallop_right() does the same except returns |
|
231 the position to the right of the rightmost equal element (if any).] |
|
232 |
|
233 "a" is a sorted vector with n elements, starting at a[0]. n must be > 0. |
|
234 |
|
235 "hint" is an index at which to begin the search, 0 <= hint < n. The closer |
|
236 hint is to the final result, the faster this runs. |
|
237 |
|
238 The return value is the int k in 0..n such that |
|
239 |
|
240 a[k-1] < key <= a[k] |
|
241 |
|
242 pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW, |
|
243 key belongs at index k; or, IOW, the first k elements of a should precede |
|
244 key, and the last n-k should follow key. |
|
245 |
|
246 Returns -1 on error. See listsort.txt for info on the method. |
|
247 */ |
|
248 template <class T> |
7433
|
249 octave_idx_type |
|
250 octave_sort<T>::gallop_left (T key, T *a, octave_idx_type n, octave_idx_type hint) |
4851
|
251 { |
7433
|
252 octave_idx_type ofs; |
|
253 octave_idx_type lastofs; |
|
254 octave_idx_type k; |
4851
|
255 |
|
256 a += hint; |
|
257 lastofs = 0; |
|
258 ofs = 1; |
|
259 IFLT (*a, key) |
|
260 { |
|
261 /* a[hint] < key -- gallop right, until |
|
262 * a[hint + lastofs] < key <= a[hint + ofs] |
|
263 */ |
7433
|
264 const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */ |
4851
|
265 while (ofs < maxofs) |
|
266 { |
|
267 IFLT (a[ofs], key) |
|
268 { |
|
269 lastofs = ofs; |
|
270 ofs = (ofs << 1) + 1; |
|
271 if (ofs <= 0) /* int overflow */ |
|
272 ofs = maxofs; |
|
273 } |
|
274 else /* key <= a[hint + ofs] */ |
|
275 break; |
|
276 } |
|
277 if (ofs > maxofs) |
|
278 ofs = maxofs; |
|
279 /* Translate back to offsets relative to &a[0]. */ |
|
280 lastofs += hint; |
|
281 ofs += hint; |
|
282 } |
|
283 else |
|
284 { |
|
285 /* key <= a[hint] -- gallop left, until |
|
286 * a[hint - ofs] < key <= a[hint - lastofs] |
|
287 */ |
7433
|
288 const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */ |
4851
|
289 while (ofs < maxofs) |
|
290 { |
|
291 IFLT (*(a-ofs), key) |
|
292 break; |
|
293 /* key <= a[hint - ofs] */ |
|
294 lastofs = ofs; |
|
295 ofs = (ofs << 1) + 1; |
|
296 if (ofs <= 0) /* int overflow */ |
|
297 ofs = maxofs; |
|
298 } |
|
299 if (ofs > maxofs) |
|
300 ofs = maxofs; |
|
301 /* Translate back to positive offsets relative to &a[0]. */ |
|
302 k = lastofs; |
|
303 lastofs = hint - ofs; |
|
304 ofs = hint - k; |
|
305 } |
|
306 a -= hint; |
|
307 |
|
308 /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the |
|
309 * right of lastofs but no farther right than ofs. Do a binary |
|
310 * search, with invariant a[lastofs-1] < key <= a[ofs]. |
|
311 */ |
|
312 ++lastofs; |
|
313 while (lastofs < ofs) |
|
314 { |
7433
|
315 octave_idx_type m = lastofs + ((ofs - lastofs) >> 1); |
4851
|
316 |
|
317 IFLT (a[m], key) |
|
318 lastofs = m+1; /* a[m] < key */ |
|
319 else |
|
320 ofs = m; /* key <= a[m] */ |
|
321 } |
7234
|
322 |
4851
|
323 return ofs; |
|
324 } |
|
325 |
|
326 /* |
|
327 Exactly like gallop_left(), except that if key already exists in a[0:n], |
|
328 finds the position immediately to the right of the rightmost equal value. |
|
329 |
|
330 The return value is the int k in 0..n such that |
|
331 |
|
332 a[k-1] <= key < a[k] |
|
333 |
|
334 or -1 if error. |
|
335 |
|
336 The code duplication is massive, but this is enough different given that |
|
337 we're sticking to "<" comparisons that it's much harder to follow if |
|
338 written as one routine with yet another "left or right?" flag. |
|
339 */ |
|
340 template <class T> |
7433
|
341 octave_idx_type |
|
342 octave_sort<T>::gallop_right (T key, T *a, octave_idx_type n, octave_idx_type hint) |
4851
|
343 { |
7433
|
344 octave_idx_type ofs; |
|
345 octave_idx_type lastofs; |
|
346 octave_idx_type k; |
4851
|
347 |
|
348 a += hint; |
|
349 lastofs = 0; |
|
350 ofs = 1; |
|
351 IFLT (key, *a) |
|
352 { |
|
353 /* key < a[hint] -- gallop left, until |
|
354 * a[hint - ofs] <= key < a[hint - lastofs] |
|
355 */ |
7433
|
356 const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */ |
4851
|
357 while (ofs < maxofs) |
|
358 { |
|
359 IFLT (key, *(a-ofs)) |
|
360 { |
|
361 lastofs = ofs; |
|
362 ofs = (ofs << 1) + 1; |
|
363 if (ofs <= 0) /* int overflow */ |
|
364 ofs = maxofs; |
|
365 } |
|
366 else /* a[hint - ofs] <= key */ |
|
367 break; |
|
368 } |
|
369 if (ofs > maxofs) |
|
370 ofs = maxofs; |
|
371 /* Translate back to positive offsets relative to &a[0]. */ |
|
372 k = lastofs; |
|
373 lastofs = hint - ofs; |
|
374 ofs = hint - k; |
|
375 } |
|
376 else |
|
377 { |
|
378 /* a[hint] <= key -- gallop right, until |
|
379 * a[hint + lastofs] <= key < a[hint + ofs] |
|
380 */ |
7433
|
381 const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */ |
4851
|
382 while (ofs < maxofs) |
|
383 { |
|
384 IFLT (key, a[ofs]) |
|
385 break; |
|
386 /* a[hint + ofs] <= key */ |
|
387 lastofs = ofs; |
|
388 ofs = (ofs << 1) + 1; |
|
389 if (ofs <= 0) /* int overflow */ |
|
390 ofs = maxofs; |
|
391 } |
|
392 if (ofs > maxofs) |
|
393 ofs = maxofs; |
|
394 /* Translate back to offsets relative to &a[0]. */ |
|
395 lastofs += hint; |
|
396 ofs += hint; |
|
397 } |
|
398 a -= hint; |
|
399 |
|
400 /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the |
|
401 * right of lastofs but no farther right than ofs. Do a binary |
|
402 * search, with invariant a[lastofs-1] <= key < a[ofs]. |
|
403 */ |
|
404 ++lastofs; |
|
405 while (lastofs < ofs) |
|
406 { |
7433
|
407 octave_idx_type m = lastofs + ((ofs - lastofs) >> 1); |
4851
|
408 |
|
409 IFLT (key, a[m]) |
|
410 ofs = m; /* key < a[m] */ |
|
411 else |
|
412 lastofs = m+1; /* a[m] <= key */ |
|
413 } |
7234
|
414 |
4851
|
415 return ofs; |
|
416 } |
|
417 |
|
418 /* Conceptually a MergeState's constructor. */ |
|
419 template <class T> |
|
420 void |
7234
|
421 octave_sort<T>::merge_init (void) |
4851
|
422 { |
7234
|
423 ms.a = 0; |
4851
|
424 ms.alloced = 0; |
|
425 ms.n = 0; |
|
426 ms.min_gallop = MIN_GALLOP; |
|
427 } |
|
428 |
|
429 /* Free all the temp memory owned by the MergeState. This must be called |
|
430 * when you're done with a MergeState, and may be called before then if |
|
431 * you want to free the temp memory early. |
|
432 */ |
|
433 template <class T> |
|
434 void |
7234
|
435 octave_sort<T>::merge_freemem (void) |
4851
|
436 { |
|
437 if (ms.a) |
|
438 free (ms.a); |
|
439 ms.alloced = 0; |
7234
|
440 ms.a = 0; |
4851
|
441 } |
|
442 |
7433
|
443 static inline octave_idx_type |
|
444 roundupsize (octave_idx_type n) |
4851
|
445 { |
|
446 unsigned int nbits = 3; |
7433
|
447 octave_idx_type n2 = static_cast<octave_idx_type> (n) >> 8; |
4851
|
448 |
|
449 /* Round up: |
|
450 * If n < 256, to a multiple of 8. |
|
451 * If n < 2048, to a multiple of 64. |
|
452 * If n < 16384, to a multiple of 512. |
|
453 * If n < 131072, to a multiple of 4096. |
|
454 * If n < 1048576, to a multiple of 32768. |
|
455 * If n < 8388608, to a multiple of 262144. |
|
456 * If n < 67108864, to a multiple of 2097152. |
|
457 * If n < 536870912, to a multiple of 16777216. |
|
458 * ... |
|
459 * If n < 2**(5+3*i), to a multiple of 2**(3*i). |
|
460 * |
|
461 * This over-allocates proportional to the list size, making room |
|
462 * for additional growth. The over-allocation is mild, but is |
|
463 * enough to give linear-time amortized behavior over a long |
|
464 * sequence of appends() in the presence of a poorly-performing |
|
465 * system realloc() (which is a reality, e.g., across all flavors |
|
466 * of Windows, with Win9x behavior being particularly bad -- and |
|
467 * we've still got address space fragmentation problems on Win9x |
|
468 * even with this scheme, although it requires much longer lists to |
|
469 * provoke them than it used to). |
|
470 */ |
7234
|
471 while (n2) |
|
472 { |
|
473 n2 >>= 3; |
|
474 nbits += 3; |
|
475 } |
|
476 |
4851
|
477 return ((n >> nbits) + 1) << nbits; |
|
478 } |
|
479 |
|
480 /* Ensure enough temp memory for 'need' array slots is available. |
|
481 * Returns 0 on success and -1 if the memory can't be gotten. |
|
482 */ |
|
483 template <class T> |
|
484 int |
7433
|
485 octave_sort<T>::merge_getmem (octave_idx_type need) |
4851
|
486 { |
|
487 if (need <= ms.alloced) |
|
488 return 0; |
|
489 |
7234
|
490 need = roundupsize (need); |
4851
|
491 /* Don't realloc! That can cost cycles to copy the old data, but |
|
492 * we don't care what's in the block. |
|
493 */ |
7234
|
494 merge_freemem (); |
5760
|
495 ms.a = static_cast <T *> (malloc (need * sizeof (T))); |
7234
|
496 if (ms.a) |
|
497 { |
|
498 ms.alloced = need; |
|
499 return 0; |
|
500 } |
|
501 merge_freemem (); /* reset to sane state */ |
|
502 |
4851
|
503 return -1; |
|
504 } |
|
505 |
7234
|
506 #define MERGE_GETMEM(NEED) ((NEED) <= ms.alloced ? 0 : merge_getmem (NEED)) |
4851
|
507 |
|
508 /* Merge the na elements starting at pa with the nb elements starting at pb |
|
509 * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. |
|
510 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the |
|
511 * merge, and should have na <= nb. See listsort.txt for more info. |
|
512 * Return 0 if successful, -1 if error. |
|
513 */ |
|
514 template <class T> |
|
515 int |
7433
|
516 octave_sort<T>::merge_lo (T *pa, octave_idx_type na, T *pb, octave_idx_type nb) |
4851
|
517 { |
7433
|
518 octave_idx_type k; |
4851
|
519 T *dest; |
|
520 int result = -1; /* guilty until proved innocent */ |
7433
|
521 octave_idx_type min_gallop = ms.min_gallop; |
4851
|
522 |
7234
|
523 if (MERGE_GETMEM (na) < 0) |
4851
|
524 return -1; |
7480
|
525 std::memcpy (ms.a, pa, na * sizeof (T)); |
4851
|
526 dest = pa; |
|
527 pa = ms.a; |
|
528 |
|
529 *dest++ = *pb++; |
|
530 --nb; |
|
531 if (nb == 0) |
|
532 goto Succeed; |
|
533 if (na == 1) |
|
534 goto CopyB; |
|
535 |
7234
|
536 for (;;) |
|
537 { |
7433
|
538 octave_idx_type acount = 0; /* # of times A won in a row */ |
|
539 octave_idx_type bcount = 0; /* # of times B won in a row */ |
7234
|
540 |
|
541 /* Do the straightforward thing until (if ever) one run |
|
542 * appears to win consistently. |
|
543 */ |
|
544 for (;;) |
|
545 { |
4851
|
546 |
7234
|
547 IFLT (*pb, *pa) |
|
548 { |
|
549 *dest++ = *pb++; |
|
550 ++bcount; |
|
551 acount = 0; |
|
552 --nb; |
|
553 if (nb == 0) |
|
554 goto Succeed; |
|
555 if (bcount >= min_gallop) |
|
556 break; |
|
557 } |
|
558 else |
|
559 { |
|
560 *dest++ = *pa++; |
|
561 ++acount; |
|
562 bcount = 0; |
|
563 --na; |
|
564 if (na == 1) |
|
565 goto CopyB; |
|
566 if (acount >= min_gallop) |
|
567 break; |
|
568 } |
|
569 } |
4851
|
570 |
7234
|
571 /* One run is winning so consistently that galloping may |
|
572 * be a huge win. So try that, and continue galloping until |
|
573 * (if ever) neither run appears to be winning consistently |
|
574 * anymore. |
|
575 */ |
|
576 ++min_gallop; |
|
577 do |
4851
|
578 { |
7234
|
579 min_gallop -= min_gallop > 1; |
|
580 ms.min_gallop = min_gallop; |
|
581 k = gallop_right (*pb, pa, na, 0); |
|
582 acount = k; |
|
583 if (k) |
|
584 { |
|
585 if (k < 0) |
|
586 goto Fail; |
7480
|
587 std::memcpy (dest, pa, k * sizeof (T)); |
7234
|
588 dest += k; |
|
589 pa += k; |
|
590 na -= k; |
|
591 if (na == 1) |
|
592 goto CopyB; |
|
593 /* na==0 is impossible now if the comparison |
|
594 * function is consistent, but we can't assume |
|
595 * that it is. |
|
596 */ |
|
597 if (na == 0) |
|
598 goto Succeed; |
|
599 } |
4851
|
600 *dest++ = *pb++; |
|
601 --nb; |
|
602 if (nb == 0) |
|
603 goto Succeed; |
7234
|
604 |
|
605 k = gallop_left (*pa, pb, nb, 0); |
|
606 bcount = k; |
|
607 if (k) |
|
608 { |
|
609 if (k < 0) |
|
610 goto Fail; |
7480
|
611 std::memmove (dest, pb, k * sizeof (T)); |
7234
|
612 dest += k; |
|
613 pb += k; |
|
614 nb -= k; |
|
615 if (nb == 0) |
|
616 goto Succeed; |
|
617 } |
4851
|
618 *dest++ = *pa++; |
|
619 --na; |
|
620 if (na == 1) |
|
621 goto CopyB; |
|
622 } |
7234
|
623 while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); |
|
624 |
|
625 ++min_gallop; /* penalize it for leaving galloping mode */ |
|
626 ms.min_gallop = min_gallop; |
4851
|
627 } |
|
628 |
|
629 Succeed: |
|
630 result = 0; |
7234
|
631 |
4851
|
632 Fail: |
|
633 if (na) |
7480
|
634 std::memcpy (dest, pa, na * sizeof (T)); |
4851
|
635 return result; |
7234
|
636 |
4851
|
637 CopyB: |
|
638 /* The last element of pa belongs at the end of the merge. */ |
7480
|
639 std::memmove (dest, pb, nb * sizeof (T)); |
4851
|
640 dest[nb] = *pa; |
7234
|
641 |
4851
|
642 return 0; |
|
643 } |
|
644 |
|
645 /* Merge the na elements starting at pa with the nb elements starting at pb |
|
646 * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. |
|
647 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the |
|
648 * merge, and should have na >= nb. See listsort.txt for more info. |
|
649 * Return 0 if successful, -1 if error. |
|
650 */ |
|
651 template <class T> |
|
652 int |
7433
|
653 octave_sort<T>::merge_hi (T *pa, octave_idx_type na, T *pb, octave_idx_type nb) |
4851
|
654 { |
7433
|
655 octave_idx_type k; |
4851
|
656 T *dest; |
|
657 int result = -1; /* guilty until proved innocent */ |
|
658 T *basea; |
|
659 T *baseb; |
7433
|
660 octave_idx_type min_gallop = ms.min_gallop; |
4851
|
661 |
7234
|
662 if (MERGE_GETMEM (nb) < 0) |
4851
|
663 return -1; |
|
664 dest = pb + nb - 1; |
7480
|
665 std::memcpy (ms.a, pb, nb * sizeof (T)); |
4851
|
666 basea = pa; |
|
667 baseb = ms.a; |
|
668 pb = ms.a + nb - 1; |
|
669 pa += na - 1; |
|
670 |
|
671 *dest-- = *pa--; |
|
672 --na; |
|
673 if (na == 0) |
|
674 goto Succeed; |
|
675 if (nb == 1) |
|
676 goto CopyA; |
|
677 |
|
678 for (;;) |
|
679 { |
7433
|
680 octave_idx_type acount = 0; /* # of times A won in a row */ |
|
681 octave_idx_type bcount = 0; /* # of times B won in a row */ |
4851
|
682 |
|
683 /* Do the straightforward thing until (if ever) one run |
|
684 * appears to win consistently. |
|
685 */ |
|
686 for (;;) |
|
687 { |
|
688 IFLT (*pb, *pa) |
|
689 { |
|
690 *dest-- = *pa--; |
|
691 ++acount; |
|
692 bcount = 0; |
|
693 --na; |
|
694 if (na == 0) |
|
695 goto Succeed; |
|
696 if (acount >= min_gallop) |
|
697 break; |
|
698 } |
|
699 else |
|
700 { |
|
701 *dest-- = *pb--; |
|
702 ++bcount; |
|
703 acount = 0; |
|
704 --nb; |
|
705 if (nb == 1) |
|
706 goto CopyA; |
|
707 if (bcount >= min_gallop) |
|
708 break; |
|
709 } |
|
710 } |
|
711 |
|
712 /* One run is winning so consistently that galloping may |
|
713 * be a huge win. So try that, and continue galloping until |
|
714 * (if ever) neither run appears to be winning consistently |
|
715 * anymore. |
|
716 */ |
|
717 ++min_gallop; |
|
718 do |
|
719 { |
|
720 min_gallop -= min_gallop > 1; |
|
721 ms.min_gallop = min_gallop; |
7234
|
722 k = gallop_right (*pb, basea, na, na-1); |
4851
|
723 if (k < 0) |
|
724 goto Fail; |
|
725 k = na - k; |
|
726 acount = k; |
|
727 if (k) |
|
728 { |
|
729 dest -= k; |
|
730 pa -= k; |
7480
|
731 std::memmove (dest+1, pa+1, k * sizeof (T)); |
4851
|
732 na -= k; |
|
733 if (na == 0) |
|
734 goto Succeed; |
|
735 } |
|
736 *dest-- = *pb--; |
|
737 --nb; |
|
738 if (nb == 1) |
|
739 goto CopyA; |
|
740 |
7234
|
741 k = gallop_left (*pa, baseb, nb, nb-1); |
4851
|
742 if (k < 0) |
|
743 goto Fail; |
|
744 k = nb - k; |
|
745 bcount = k; |
|
746 if (k) |
|
747 { |
|
748 dest -= k; |
|
749 pb -= k; |
7480
|
750 std::memcpy (dest+1, pb+1, k * sizeof (T)); |
4851
|
751 nb -= k; |
|
752 if (nb == 1) |
|
753 goto CopyA; |
|
754 /* nb==0 is impossible now if the comparison |
|
755 * function is consistent, but we can't assume |
|
756 * that it is. |
|
757 */ |
|
758 if (nb == 0) |
|
759 goto Succeed; |
|
760 } |
|
761 *dest-- = *pa--; |
|
762 --na; |
|
763 if (na == 0) |
|
764 goto Succeed; |
|
765 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); |
|
766 ++min_gallop; /* penalize it for leaving galloping mode */ |
|
767 ms.min_gallop = min_gallop; |
|
768 } |
7234
|
769 |
4851
|
770 Succeed: |
|
771 result = 0; |
7234
|
772 |
4851
|
773 Fail: |
|
774 if (nb) |
7480
|
775 std::memcpy (dest-(nb-1), baseb, nb * sizeof (T)); |
4851
|
776 return result; |
7234
|
777 |
4851
|
778 CopyA: |
|
779 /* The first element of pb belongs at the front of the merge. */ |
|
780 dest -= na; |
|
781 pa -= na; |
7480
|
782 std::memmove (dest+1, pa+1, na * sizeof (T)); |
4851
|
783 *dest = *pb; |
7234
|
784 |
4851
|
785 return 0; |
|
786 } |
|
787 |
|
788 /* Merge the two runs at stack indices i and i+1. |
|
789 * Returns 0 on success, -1 on error. |
|
790 */ |
|
791 template <class T> |
|
792 int |
7433
|
793 octave_sort<T>::merge_at (octave_idx_type i) |
4851
|
794 { |
|
795 T *pa, *pb; |
7433
|
796 octave_idx_type na, nb; |
|
797 octave_idx_type k; |
4851
|
798 |
|
799 pa = ms.pending[i].base; |
|
800 na = ms.pending[i].len; |
|
801 pb = ms.pending[i+1].base; |
|
802 nb = ms.pending[i+1].len; |
|
803 |
|
804 /* Record the length of the combined runs; if i is the 3rd-last |
|
805 * run now, also slide over the last run (which isn't involved |
|
806 * in this merge). The current run i+1 goes away in any case. |
|
807 */ |
|
808 ms.pending[i].len = na + nb; |
|
809 if (i == ms.n - 3) |
|
810 ms.pending[i+1] = ms.pending[i+2]; |
|
811 --ms.n; |
|
812 |
|
813 /* Where does b start in a? Elements in a before that can be |
|
814 * ignored (already in place). |
|
815 */ |
7234
|
816 k = gallop_right (*pb, pa, na, 0); |
4851
|
817 if (k < 0) |
|
818 return -1; |
|
819 pa += k; |
|
820 na -= k; |
|
821 if (na == 0) |
|
822 return 0; |
|
823 |
|
824 /* Where does a end in b? Elements in b after that can be |
|
825 * ignored (already in place). |
|
826 */ |
7234
|
827 nb = gallop_left (pa[na-1], pb, nb, nb-1); |
4851
|
828 if (nb <= 0) |
|
829 return nb; |
|
830 |
|
831 /* Merge what remains of the runs, using a temp array with |
|
832 * min(na, nb) elements. |
|
833 */ |
|
834 if (na <= nb) |
7234
|
835 return merge_lo (pa, na, pb, nb); |
4851
|
836 else |
7234
|
837 return merge_hi (pa, na, pb, nb); |
4851
|
838 } |
|
839 |
|
840 /* Examine the stack of runs waiting to be merged, merging adjacent runs |
|
841 * until the stack invariants are re-established: |
|
842 * |
|
843 * 1. len[-3] > len[-2] + len[-1] |
|
844 * 2. len[-2] > len[-1] |
|
845 * |
|
846 * See listsort.txt for more info. |
|
847 * |
|
848 * Returns 0 on success, -1 on error. |
|
849 */ |
|
850 template <class T> |
|
851 int |
7234
|
852 octave_sort<T>::merge_collapse (void) |
4851
|
853 { |
|
854 struct s_slice *p = ms.pending; |
|
855 |
|
856 while (ms.n > 1) |
|
857 { |
7433
|
858 octave_idx_type n = ms.n - 2; |
4851
|
859 if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len) |
|
860 { |
|
861 if (p[n-1].len < p[n+1].len) |
|
862 --n; |
7234
|
863 if (merge_at (n) < 0) |
4851
|
864 return -1; |
|
865 } |
|
866 else if (p[n].len <= p[n+1].len) |
|
867 { |
7234
|
868 if (merge_at (n) < 0) |
4851
|
869 return -1; |
|
870 } |
|
871 else |
|
872 break; |
|
873 } |
7234
|
874 |
4851
|
875 return 0; |
|
876 } |
|
877 |
|
878 /* Regardless of invariants, merge all runs on the stack until only one |
|
879 * remains. This is used at the end of the mergesort. |
|
880 * |
|
881 * Returns 0 on success, -1 on error. |
|
882 */ |
|
883 template <class T> |
|
884 int |
7234
|
885 octave_sort<T>::merge_force_collapse (void) |
4851
|
886 { |
|
887 struct s_slice *p = ms.pending; |
|
888 |
|
889 while (ms.n > 1) |
|
890 { |
7433
|
891 octave_idx_type n = ms.n - 2; |
4851
|
892 if (n > 0 && p[n-1].len < p[n+1].len) |
|
893 --n; |
7234
|
894 if (merge_at (n) < 0) |
4851
|
895 return -1; |
|
896 } |
7234
|
897 |
4851
|
898 return 0; |
|
899 } |
|
900 |
|
901 /* Compute a good value for the minimum run length; natural runs shorter |
|
902 * than this are boosted artificially via binary insertion. |
|
903 * |
|
904 * If n < 64, return n (it's too small to bother with fancy stuff). |
|
905 * Else if n is an exact power of 2, return 32. |
|
906 * Else return an int k, 32 <= k <= 64, such that n/k is close to, but |
|
907 * strictly less than, an exact power of 2. |
|
908 * |
|
909 * See listsort.txt for more info. |
|
910 */ |
|
911 template <class T> |
7433
|
912 octave_idx_type |
|
913 octave_sort<T>::merge_compute_minrun (octave_idx_type n) |
4851
|
914 { |
7433
|
915 octave_idx_type r = 0; /* becomes 1 if any 1 bits are shifted off */ |
4851
|
916 |
7234
|
917 while (n >= 64) |
|
918 { |
|
919 r |= n & 1; |
|
920 n >>= 1; |
|
921 } |
|
922 |
4851
|
923 return n + r; |
|
924 } |
|
925 |
|
926 template <class T> |
|
927 void |
7433
|
928 octave_sort<T>::sort (T *v, octave_idx_type elements) |
4851
|
929 { |
|
930 /* Re-initialize the Mergestate as this might be the second time called */ |
|
931 ms.n = 0; |
|
932 ms.min_gallop = MIN_GALLOP; |
|
933 |
|
934 if (elements > 1) |
|
935 { |
7433
|
936 octave_idx_type nremaining = elements; |
4851
|
937 T *lo = v; |
|
938 T *hi = v + elements; |
|
939 |
|
940 /* March over the array once, left to right, finding natural runs, |
|
941 * and extending short natural runs to minrun elements. |
|
942 */ |
7433
|
943 octave_idx_type minrun = merge_compute_minrun (nremaining); |
4851
|
944 do |
|
945 { |
|
946 int descending; |
7433
|
947 octave_idx_type n; |
4851
|
948 |
|
949 /* Identify next run. */ |
7234
|
950 n = count_run (lo, hi, &descending); |
4851
|
951 if (n < 0) |
|
952 goto fail; |
|
953 if (descending) |
7234
|
954 reverse_slice (lo, lo + n); |
4851
|
955 /* If short, extend to min(minrun, nremaining). */ |
|
956 if (n < minrun) |
|
957 { |
7433
|
958 const octave_idx_type force = nremaining <= minrun ? nremaining : minrun; |
7234
|
959 binarysort (lo, lo + force, lo + n); |
4851
|
960 n = force; |
|
961 } |
|
962 /* Push run onto pending-runs stack, and maybe merge. */ |
7234
|
963 assert (ms.n < MAX_MERGE_PENDING); |
4851
|
964 ms.pending[ms.n].base = lo; |
|
965 ms.pending[ms.n].len = n; |
|
966 ++ms.n; |
7234
|
967 if (merge_collapse () < 0) |
4851
|
968 goto fail; |
|
969 /* Advance to find next run. */ |
|
970 lo += n; |
|
971 nremaining -= n; |
7234
|
972 } |
|
973 while (nremaining); |
4851
|
974 |
7234
|
975 merge_force_collapse (); |
4851
|
976 } |
|
977 |
|
978 fail: |
|
979 return; |
|
980 } |
|
981 |
|
982 /* |
|
983 ;;; Local Variables: *** |
|
984 ;;; mode: C++ *** |
|
985 ;;; End: *** |
|
986 */ |