Mercurial > hg > octave-nkf
annotate liboctave/oct-sort.cc @ 8678:e2b4c19c455c
redo changeset 4238f2600a17 with fixes to sorting
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Wed, 04 Feb 2009 16:05:01 +0100 |
parents | 0168d22e6bba |
children | 314be237cd5b |
rev | line source |
---|---|
4851 | 1 /* |
7017 | 2 Copyright (C) 2003, 2004, 2005, 2006, 2007 David Bateman |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
3 Copyright (C) 2008 Jaroslav Hajek |
4851 | 4 |
5 This file is part of Octave. | |
6 | |
7 Octave is free software; you can redistribute it and/or modify it | |
8 under the terms of the GNU General Public License as published by the | |
7016 | 9 Free Software Foundation; either version 3 of the License, or (at your |
10 option) any later version. | |
4851 | 11 |
12 Octave is distributed in the hope that it will be useful, but WITHOUT | |
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
7016 | 18 along with Octave; see the file COPYING. If not, see |
19 <http://www.gnu.org/licenses/>. | |
4851 | 20 |
21 Code stolen in large part from Python's, listobject.c, which itself had | |
22 no license header. However, thanks to Tim Peters for the parts of the | |
23 code I ripped-off. | |
24 | |
25 As required in the Python license the short description of the changes | |
26 made are | |
27 | |
28 * convert the sorting code in listobject.cc into a generic class, | |
29 replacing PyObject* with the type of the class T. | |
30 | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
31 * replaced usages of malloc, free, memcpy and memmove by standard C++ |
8678
e2b4c19c455c
redo changeset 4238f2600a17 with fixes to sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8206
diff
changeset
|
32 new [], delete [] and std::copy and std::copy_backward. Note that replacing |
e2b4c19c455c
redo changeset 4238f2600a17 with fixes to sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8206
diff
changeset
|
33 memmove by std::copy is possible if the destination starts before the source. |
e2b4c19c455c
redo changeset 4238f2600a17 with fixes to sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8206
diff
changeset
|
34 If not, std::copy_backward needs to be used. |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
35 |
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
36 |
4851 | 37 The Python license is |
38 | |
39 PSF LICENSE AGREEMENT FOR PYTHON 2.3 | |
40 -------------------------------------- | |
41 | |
42 1. This LICENSE AGREEMENT is between the Python Software Foundation | |
43 ("PSF"), and the Individual or Organization ("Licensee") accessing and | |
44 otherwise using Python 2.3 software in source or binary form and its | |
45 associated documentation. | |
46 | |
47 2. Subject to the terms and conditions of this License Agreement, PSF | |
48 hereby grants Licensee a nonexclusive, royalty-free, world-wide | |
49 license to reproduce, analyze, test, perform and/or display publicly, | |
50 prepare derivative works, distribute, and otherwise use Python 2.3 | |
51 alone or in any derivative version, provided, however, that PSF's | |
52 License Agreement and PSF's notice of copyright, i.e., "Copyright (c) | |
53 2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are | |
54 retained in Python 2.3 alone or in any derivative version prepared by | |
55 Licensee. | |
56 | |
57 3. In the event Licensee prepares a derivative work that is based on | |
58 or incorporates Python 2.3 or any part thereof, and wants to make | |
59 the derivative work available to others as provided herein, then | |
60 Licensee hereby agrees to include in any such work a brief summary of | |
61 the changes made to Python 2.3. | |
62 | |
63 4. PSF is making Python 2.3 available to Licensee on an "AS IS" | |
64 basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR | |
65 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND | |
66 DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS | |
67 FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT | |
68 INFRINGE ANY THIRD PARTY RIGHTS. | |
69 | |
70 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON | |
71 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS | |
72 A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3, | |
73 OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. | |
74 | |
75 6. This License Agreement will automatically terminate upon a material | |
76 breach of its terms and conditions. | |
77 | |
78 7. Nothing in this License Agreement shall be deemed to create any | |
79 relationship of agency, partnership, or joint venture between PSF and | |
80 Licensee. This License Agreement does not grant permission to use PSF | |
81 trademarks or trade name in a trademark sense to endorse or promote | |
82 products or services of Licensee, or any third party. | |
83 | |
84 8. By copying, installing or otherwise using Python 2.3, Licensee | |
85 agrees to be bound by the terms and conditions of this License | |
86 Agreement. | |
87 */ | |
88 | |
89 #ifdef HAVE_CONFIG_H | |
90 #include <config.h> | |
91 #endif | |
92 | |
5883 | 93 #include <cassert> |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
94 #include <algorithm> |
7480 | 95 #include <cstring> |
5883 | 96 |
4851 | 97 #include "lo-mappers.h" |
98 #include "quit.h" | |
99 #include "oct-sort.h" | |
100 | |
7433 | 101 #ifndef IFLT |
7234 | 102 #define IFLT(a,b) if (compare ? compare ((a), (b)) : ((a) < (b))) |
7433 | 103 #endif |
4851 | 104 |
105 template <class T> | |
7234 | 106 octave_sort<T>::octave_sort (void) : compare (0) |
4851 | 107 { |
7234 | 108 merge_init (); |
4851 | 109 merge_getmem (1024); |
110 } | |
111 | |
112 template <class T> | |
113 octave_sort<T>::octave_sort (bool (*comp) (T, T)) : compare (comp) | |
114 { | |
4998 | 115 merge_init (); |
4851 | 116 merge_getmem (1024); |
117 } | |
118 | |
119 /* Reverse a slice of a list in place, from lo up to (exclusive) hi. */ | |
120 template <class T> | |
121 void | |
7234 | 122 octave_sort<T>::reverse_slice (T *lo, T *hi) |
4851 | 123 { |
124 --hi; | |
125 while (lo < hi) | |
126 { | |
127 T t = *lo; | |
128 *lo = *hi; | |
129 *hi = t; | |
130 ++lo; | |
131 --hi; | |
132 } | |
133 } | |
134 | |
135 template <class T> | |
136 void | |
137 octave_sort<T>::binarysort (T *lo, T *hi, T *start) | |
138 { | |
6959 | 139 T *l, *p, *r; |
140 T pivot; | |
4851 | 141 |
142 if (lo == start) | |
143 ++start; | |
144 | |
145 for (; start < hi; ++start) | |
146 { | |
147 /* set l to where *start belongs */ | |
148 l = lo; | |
149 r = start; | |
150 pivot = *r; | |
151 /* Invariants: | |
152 * pivot >= all in [lo, l). | |
153 * pivot < all in [r, start). | |
154 * The second is vacuously true at the start. | |
155 */ | |
156 do | |
157 { | |
158 p = l + ((r - l) >> 1); | |
159 IFLT (pivot, *p) | |
160 r = p; | |
161 else | |
162 l = p+1; | |
163 } | |
164 while (l < r); | |
165 /* The invariants still hold, so pivot >= all in [lo, l) and | |
166 pivot < all in [l, start), so pivot belongs at l. Note | |
167 that if there are elements equal to pivot, l points to the | |
168 first slot after them -- that's why this sort is stable. | |
169 Slide over to make room. | |
170 Caution: using memmove is much slower under MSVC 5; | |
171 we're not usually moving many slots. */ | |
172 for (p = start; p > l; --p) | |
173 *p = *(p-1); | |
174 *l = pivot; | |
175 } | |
176 | |
177 return; | |
178 } | |
179 | |
180 /* | |
181 Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi | |
182 is required on entry. "A run" is the longest ascending sequence, with | |
183 | |
184 lo[0] <= lo[1] <= lo[2] <= ... | |
185 | |
186 or the longest descending sequence, with | |
187 | |
188 lo[0] > lo[1] > lo[2] > ... | |
189 | |
7929 | 190 DESCENDING is set to false in the former case, or to true in the latter. |
4851 | 191 For its intended use in a stable mergesort, the strictness of the defn of |
192 "descending" is needed so that the caller can safely reverse a descending | |
193 sequence without violating stability (strict > ensures there are no equal | |
194 elements to get out of order). | |
195 | |
196 Returns -1 in case of error. | |
197 */ | |
198 template <class T> | |
7433 | 199 octave_idx_type |
7929 | 200 octave_sort<T>::count_run (T *lo, T *hi, bool& descending) |
4851 | 201 { |
7433 | 202 octave_idx_type n; |
4851 | 203 |
7929 | 204 descending = false; |
4851 | 205 ++lo; |
206 if (lo == hi) | |
207 return 1; | |
208 | |
209 n = 2; | |
210 | |
211 IFLT (*lo, *(lo-1)) | |
212 { | |
7929 | 213 descending = true; |
4851 | 214 for (lo = lo+1; lo < hi; ++lo, ++n) |
215 { | |
216 IFLT (*lo, *(lo-1)) | |
217 ; | |
218 else | |
219 break; | |
220 } | |
221 } | |
222 else | |
223 { | |
224 for (lo = lo+1; lo < hi; ++lo, ++n) | |
225 { | |
226 IFLT (*lo, *(lo-1)) | |
227 break; | |
228 } | |
229 } | |
230 | |
231 return n; | |
232 } | |
233 | |
234 /* | |
235 Locate the proper position of key in a sorted vector; if the vector contains | |
236 an element equal to key, return the position immediately to the left of | |
237 the leftmost equal element. [gallop_right() does the same except returns | |
238 the position to the right of the rightmost equal element (if any).] | |
239 | |
240 "a" is a sorted vector with n elements, starting at a[0]. n must be > 0. | |
241 | |
242 "hint" is an index at which to begin the search, 0 <= hint < n. The closer | |
243 hint is to the final result, the faster this runs. | |
244 | |
245 The return value is the int k in 0..n such that | |
246 | |
247 a[k-1] < key <= a[k] | |
248 | |
249 pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW, | |
250 key belongs at index k; or, IOW, the first k elements of a should precede | |
251 key, and the last n-k should follow key. | |
252 | |
253 Returns -1 on error. See listsort.txt for info on the method. | |
254 */ | |
255 template <class T> | |
7433 | 256 octave_idx_type |
257 octave_sort<T>::gallop_left (T key, T *a, octave_idx_type n, octave_idx_type hint) | |
4851 | 258 { |
7433 | 259 octave_idx_type ofs; |
260 octave_idx_type lastofs; | |
261 octave_idx_type k; | |
4851 | 262 |
263 a += hint; | |
264 lastofs = 0; | |
265 ofs = 1; | |
266 IFLT (*a, key) | |
267 { | |
268 /* a[hint] < key -- gallop right, until | |
269 * a[hint + lastofs] < key <= a[hint + ofs] | |
270 */ | |
7433 | 271 const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */ |
4851 | 272 while (ofs < maxofs) |
273 { | |
274 IFLT (a[ofs], key) | |
275 { | |
276 lastofs = ofs; | |
277 ofs = (ofs << 1) + 1; | |
278 if (ofs <= 0) /* int overflow */ | |
279 ofs = maxofs; | |
280 } | |
281 else /* key <= a[hint + ofs] */ | |
282 break; | |
283 } | |
284 if (ofs > maxofs) | |
285 ofs = maxofs; | |
286 /* Translate back to offsets relative to &a[0]. */ | |
287 lastofs += hint; | |
288 ofs += hint; | |
289 } | |
290 else | |
291 { | |
292 /* key <= a[hint] -- gallop left, until | |
293 * a[hint - ofs] < key <= a[hint - lastofs] | |
294 */ | |
7433 | 295 const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */ |
4851 | 296 while (ofs < maxofs) |
297 { | |
298 IFLT (*(a-ofs), key) | |
299 break; | |
300 /* key <= a[hint - ofs] */ | |
301 lastofs = ofs; | |
302 ofs = (ofs << 1) + 1; | |
303 if (ofs <= 0) /* int overflow */ | |
304 ofs = maxofs; | |
305 } | |
306 if (ofs > maxofs) | |
307 ofs = maxofs; | |
308 /* Translate back to positive offsets relative to &a[0]. */ | |
309 k = lastofs; | |
310 lastofs = hint - ofs; | |
311 ofs = hint - k; | |
312 } | |
313 a -= hint; | |
314 | |
315 /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the | |
316 * right of lastofs but no farther right than ofs. Do a binary | |
317 * search, with invariant a[lastofs-1] < key <= a[ofs]. | |
318 */ | |
319 ++lastofs; | |
320 while (lastofs < ofs) | |
321 { | |
7433 | 322 octave_idx_type m = lastofs + ((ofs - lastofs) >> 1); |
4851 | 323 |
324 IFLT (a[m], key) | |
325 lastofs = m+1; /* a[m] < key */ | |
326 else | |
327 ofs = m; /* key <= a[m] */ | |
328 } | |
7234 | 329 |
4851 | 330 return ofs; |
331 } | |
332 | |
333 /* | |
334 Exactly like gallop_left(), except that if key already exists in a[0:n], | |
335 finds the position immediately to the right of the rightmost equal value. | |
336 | |
337 The return value is the int k in 0..n such that | |
338 | |
339 a[k-1] <= key < a[k] | |
340 | |
341 or -1 if error. | |
342 | |
343 The code duplication is massive, but this is enough different given that | |
344 we're sticking to "<" comparisons that it's much harder to follow if | |
345 written as one routine with yet another "left or right?" flag. | |
346 */ | |
347 template <class T> | |
7433 | 348 octave_idx_type |
349 octave_sort<T>::gallop_right (T key, T *a, octave_idx_type n, octave_idx_type hint) | |
4851 | 350 { |
7433 | 351 octave_idx_type ofs; |
352 octave_idx_type lastofs; | |
353 octave_idx_type k; | |
4851 | 354 |
355 a += hint; | |
356 lastofs = 0; | |
357 ofs = 1; | |
358 IFLT (key, *a) | |
359 { | |
360 /* key < a[hint] -- gallop left, until | |
361 * a[hint - ofs] <= key < a[hint - lastofs] | |
362 */ | |
7433 | 363 const octave_idx_type maxofs = hint + 1; /* &a[0] is lowest */ |
4851 | 364 while (ofs < maxofs) |
365 { | |
366 IFLT (key, *(a-ofs)) | |
367 { | |
368 lastofs = ofs; | |
369 ofs = (ofs << 1) + 1; | |
370 if (ofs <= 0) /* int overflow */ | |
371 ofs = maxofs; | |
372 } | |
373 else /* a[hint - ofs] <= key */ | |
374 break; | |
375 } | |
376 if (ofs > maxofs) | |
377 ofs = maxofs; | |
378 /* Translate back to positive offsets relative to &a[0]. */ | |
379 k = lastofs; | |
380 lastofs = hint - ofs; | |
381 ofs = hint - k; | |
382 } | |
383 else | |
384 { | |
385 /* a[hint] <= key -- gallop right, until | |
386 * a[hint + lastofs] <= key < a[hint + ofs] | |
387 */ | |
7433 | 388 const octave_idx_type maxofs = n - hint; /* &a[n-1] is highest */ |
4851 | 389 while (ofs < maxofs) |
390 { | |
391 IFLT (key, a[ofs]) | |
392 break; | |
393 /* a[hint + ofs] <= key */ | |
394 lastofs = ofs; | |
395 ofs = (ofs << 1) + 1; | |
396 if (ofs <= 0) /* int overflow */ | |
397 ofs = maxofs; | |
398 } | |
399 if (ofs > maxofs) | |
400 ofs = maxofs; | |
401 /* Translate back to offsets relative to &a[0]. */ | |
402 lastofs += hint; | |
403 ofs += hint; | |
404 } | |
405 a -= hint; | |
406 | |
407 /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the | |
408 * right of lastofs but no farther right than ofs. Do a binary | |
409 * search, with invariant a[lastofs-1] <= key < a[ofs]. | |
410 */ | |
411 ++lastofs; | |
412 while (lastofs < ofs) | |
413 { | |
7433 | 414 octave_idx_type m = lastofs + ((ofs - lastofs) >> 1); |
4851 | 415 |
416 IFLT (key, a[m]) | |
417 ofs = m; /* key < a[m] */ | |
418 else | |
419 lastofs = m+1; /* a[m] <= key */ | |
420 } | |
7234 | 421 |
4851 | 422 return ofs; |
423 } | |
424 | |
425 /* Conceptually a MergeState's constructor. */ | |
426 template <class T> | |
427 void | |
7234 | 428 octave_sort<T>::merge_init (void) |
4851 | 429 { |
7234 | 430 ms.a = 0; |
4851 | 431 ms.alloced = 0; |
432 ms.n = 0; | |
433 ms.min_gallop = MIN_GALLOP; | |
434 } | |
435 | |
436 /* Free all the temp memory owned by the MergeState. This must be called | |
437 * when you're done with a MergeState, and may be called before then if | |
438 * you want to free the temp memory early. | |
439 */ | |
440 template <class T> | |
441 void | |
7234 | 442 octave_sort<T>::merge_freemem (void) |
4851 | 443 { |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
444 delete [] ms.a; |
4851 | 445 ms.alloced = 0; |
7234 | 446 ms.a = 0; |
4851 | 447 } |
448 | |
7433 | 449 static inline octave_idx_type |
450 roundupsize (octave_idx_type n) | |
4851 | 451 { |
452 unsigned int nbits = 3; | |
7433 | 453 octave_idx_type n2 = static_cast<octave_idx_type> (n) >> 8; |
4851 | 454 |
455 /* Round up: | |
456 * If n < 256, to a multiple of 8. | |
457 * If n < 2048, to a multiple of 64. | |
458 * If n < 16384, to a multiple of 512. | |
459 * If n < 131072, to a multiple of 4096. | |
460 * If n < 1048576, to a multiple of 32768. | |
461 * If n < 8388608, to a multiple of 262144. | |
462 * If n < 67108864, to a multiple of 2097152. | |
463 * If n < 536870912, to a multiple of 16777216. | |
464 * ... | |
465 * If n < 2**(5+3*i), to a multiple of 2**(3*i). | |
466 * | |
467 * This over-allocates proportional to the list size, making room | |
468 * for additional growth. The over-allocation is mild, but is | |
469 * enough to give linear-time amortized behavior over a long | |
470 * sequence of appends() in the presence of a poorly-performing | |
471 * system realloc() (which is a reality, e.g., across all flavors | |
472 * of Windows, with Win9x behavior being particularly bad -- and | |
473 * we've still got address space fragmentation problems on Win9x | |
474 * even with this scheme, although it requires much longer lists to | |
475 * provoke them than it used to). | |
476 */ | |
7234 | 477 while (n2) |
478 { | |
479 n2 >>= 3; | |
480 nbits += 3; | |
481 } | |
482 | |
4851 | 483 return ((n >> nbits) + 1) << nbits; |
484 } | |
485 | |
486 /* Ensure enough temp memory for 'need' array slots is available. | |
487 * Returns 0 on success and -1 if the memory can't be gotten. | |
488 */ | |
489 template <class T> | |
490 int | |
7433 | 491 octave_sort<T>::merge_getmem (octave_idx_type need) |
4851 | 492 { |
493 if (need <= ms.alloced) | |
494 return 0; | |
495 | |
7234 | 496 need = roundupsize (need); |
4851 | 497 /* Don't realloc! That can cost cycles to copy the old data, but |
498 * we don't care what's in the block. | |
499 */ | |
7234 | 500 merge_freemem (); |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
501 ms.a = new T[need]; |
7234 | 502 if (ms.a) |
503 { | |
504 ms.alloced = need; | |
505 return 0; | |
506 } | |
507 merge_freemem (); /* reset to sane state */ | |
508 | |
4851 | 509 return -1; |
510 } | |
511 | |
7234 | 512 #define MERGE_GETMEM(NEED) ((NEED) <= ms.alloced ? 0 : merge_getmem (NEED)) |
4851 | 513 |
514 /* Merge the na elements starting at pa with the nb elements starting at pb | |
515 * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. | |
516 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the | |
517 * merge, and should have na <= nb. See listsort.txt for more info. | |
518 * Return 0 if successful, -1 if error. | |
519 */ | |
520 template <class T> | |
521 int | |
7433 | 522 octave_sort<T>::merge_lo (T *pa, octave_idx_type na, T *pb, octave_idx_type nb) |
4851 | 523 { |
7433 | 524 octave_idx_type k; |
4851 | 525 T *dest; |
526 int result = -1; /* guilty until proved innocent */ | |
7433 | 527 octave_idx_type min_gallop = ms.min_gallop; |
4851 | 528 |
7234 | 529 if (MERGE_GETMEM (na) < 0) |
4851 | 530 return -1; |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
531 std::copy (pa, pa + na, ms.a); |
4851 | 532 dest = pa; |
533 pa = ms.a; | |
534 | |
535 *dest++ = *pb++; | |
536 --nb; | |
537 if (nb == 0) | |
538 goto Succeed; | |
539 if (na == 1) | |
540 goto CopyB; | |
541 | |
7234 | 542 for (;;) |
543 { | |
7433 | 544 octave_idx_type acount = 0; /* # of times A won in a row */ |
545 octave_idx_type bcount = 0; /* # of times B won in a row */ | |
7234 | 546 |
547 /* Do the straightforward thing until (if ever) one run | |
548 * appears to win consistently. | |
549 */ | |
550 for (;;) | |
551 { | |
4851 | 552 |
7234 | 553 IFLT (*pb, *pa) |
554 { | |
555 *dest++ = *pb++; | |
556 ++bcount; | |
557 acount = 0; | |
558 --nb; | |
559 if (nb == 0) | |
560 goto Succeed; | |
561 if (bcount >= min_gallop) | |
562 break; | |
563 } | |
564 else | |
565 { | |
566 *dest++ = *pa++; | |
567 ++acount; | |
568 bcount = 0; | |
569 --na; | |
570 if (na == 1) | |
571 goto CopyB; | |
572 if (acount >= min_gallop) | |
573 break; | |
574 } | |
575 } | |
4851 | 576 |
7234 | 577 /* One run is winning so consistently that galloping may |
578 * be a huge win. So try that, and continue galloping until | |
579 * (if ever) neither run appears to be winning consistently | |
580 * anymore. | |
581 */ | |
582 ++min_gallop; | |
583 do | |
4851 | 584 { |
7234 | 585 min_gallop -= min_gallop > 1; |
586 ms.min_gallop = min_gallop; | |
587 k = gallop_right (*pb, pa, na, 0); | |
588 acount = k; | |
589 if (k) | |
590 { | |
591 if (k < 0) | |
592 goto Fail; | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
593 std::copy (pa, pa + k, dest); |
7234 | 594 dest += k; |
595 pa += k; | |
596 na -= k; | |
597 if (na == 1) | |
598 goto CopyB; | |
599 /* na==0 is impossible now if the comparison | |
600 * function is consistent, but we can't assume | |
601 * that it is. | |
602 */ | |
603 if (na == 0) | |
604 goto Succeed; | |
605 } | |
4851 | 606 *dest++ = *pb++; |
607 --nb; | |
608 if (nb == 0) | |
609 goto Succeed; | |
7234 | 610 |
611 k = gallop_left (*pa, pb, nb, 0); | |
612 bcount = k; | |
613 if (k) | |
614 { | |
615 if (k < 0) | |
616 goto Fail; | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
617 std::copy (pb, pb + k, dest); |
7234 | 618 dest += k; |
619 pb += k; | |
620 nb -= k; | |
621 if (nb == 0) | |
622 goto Succeed; | |
623 } | |
4851 | 624 *dest++ = *pa++; |
625 --na; | |
626 if (na == 1) | |
627 goto CopyB; | |
628 } | |
7234 | 629 while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); |
630 | |
631 ++min_gallop; /* penalize it for leaving galloping mode */ | |
632 ms.min_gallop = min_gallop; | |
4851 | 633 } |
634 | |
635 Succeed: | |
636 result = 0; | |
7234 | 637 |
4851 | 638 Fail: |
639 if (na) | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
640 std::copy (pa, pa + na, dest); |
4851 | 641 return result; |
7234 | 642 |
4851 | 643 CopyB: |
644 /* The last element of pa belongs at the end of the merge. */ | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
645 std::copy (pb, pb + nb, dest); |
4851 | 646 dest[nb] = *pa; |
7234 | 647 |
4851 | 648 return 0; |
649 } | |
650 | |
651 /* Merge the na elements starting at pa with the nb elements starting at pb | |
652 * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. | |
653 * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the | |
654 * merge, and should have na >= nb. See listsort.txt for more info. | |
655 * Return 0 if successful, -1 if error. | |
656 */ | |
657 template <class T> | |
658 int | |
7433 | 659 octave_sort<T>::merge_hi (T *pa, octave_idx_type na, T *pb, octave_idx_type nb) |
4851 | 660 { |
7433 | 661 octave_idx_type k; |
4851 | 662 T *dest; |
663 int result = -1; /* guilty until proved innocent */ | |
664 T *basea; | |
665 T *baseb; | |
7433 | 666 octave_idx_type min_gallop = ms.min_gallop; |
4851 | 667 |
7234 | 668 if (MERGE_GETMEM (nb) < 0) |
4851 | 669 return -1; |
670 dest = pb + nb - 1; | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
671 std::copy (pb, pb + nb, ms.a); |
4851 | 672 basea = pa; |
673 baseb = ms.a; | |
674 pb = ms.a + nb - 1; | |
675 pa += na - 1; | |
676 | |
677 *dest-- = *pa--; | |
678 --na; | |
679 if (na == 0) | |
680 goto Succeed; | |
681 if (nb == 1) | |
682 goto CopyA; | |
683 | |
684 for (;;) | |
685 { | |
7433 | 686 octave_idx_type acount = 0; /* # of times A won in a row */ |
687 octave_idx_type bcount = 0; /* # of times B won in a row */ | |
4851 | 688 |
689 /* Do the straightforward thing until (if ever) one run | |
690 * appears to win consistently. | |
691 */ | |
692 for (;;) | |
693 { | |
694 IFLT (*pb, *pa) | |
695 { | |
696 *dest-- = *pa--; | |
697 ++acount; | |
698 bcount = 0; | |
699 --na; | |
700 if (na == 0) | |
701 goto Succeed; | |
702 if (acount >= min_gallop) | |
703 break; | |
704 } | |
705 else | |
706 { | |
707 *dest-- = *pb--; | |
708 ++bcount; | |
709 acount = 0; | |
710 --nb; | |
711 if (nb == 1) | |
712 goto CopyA; | |
713 if (bcount >= min_gallop) | |
714 break; | |
715 } | |
716 } | |
717 | |
718 /* One run is winning so consistently that galloping may | |
719 * be a huge win. So try that, and continue galloping until | |
720 * (if ever) neither run appears to be winning consistently | |
721 * anymore. | |
722 */ | |
723 ++min_gallop; | |
724 do | |
725 { | |
726 min_gallop -= min_gallop > 1; | |
727 ms.min_gallop = min_gallop; | |
7234 | 728 k = gallop_right (*pb, basea, na, na-1); |
4851 | 729 if (k < 0) |
730 goto Fail; | |
731 k = na - k; | |
732 acount = k; | |
733 if (k) | |
734 { | |
735 dest -= k; | |
736 pa -= k; | |
8678
e2b4c19c455c
redo changeset 4238f2600a17 with fixes to sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8206
diff
changeset
|
737 std::copy_backward (pa+1, pa+1 + k, dest+1 + k); |
4851 | 738 na -= k; |
739 if (na == 0) | |
740 goto Succeed; | |
741 } | |
742 *dest-- = *pb--; | |
743 --nb; | |
744 if (nb == 1) | |
745 goto CopyA; | |
746 | |
7234 | 747 k = gallop_left (*pa, baseb, nb, nb-1); |
4851 | 748 if (k < 0) |
749 goto Fail; | |
750 k = nb - k; | |
751 bcount = k; | |
752 if (k) | |
753 { | |
754 dest -= k; | |
755 pb -= k; | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
756 std::copy (pb+1, pb+1 + k, dest+1); |
4851 | 757 nb -= k; |
758 if (nb == 1) | |
759 goto CopyA; | |
760 /* nb==0 is impossible now if the comparison | |
761 * function is consistent, but we can't assume | |
762 * that it is. | |
763 */ | |
764 if (nb == 0) | |
765 goto Succeed; | |
766 } | |
767 *dest-- = *pa--; | |
768 --na; | |
769 if (na == 0) | |
770 goto Succeed; | |
771 } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); | |
772 ++min_gallop; /* penalize it for leaving galloping mode */ | |
773 ms.min_gallop = min_gallop; | |
774 } | |
7234 | 775 |
4851 | 776 Succeed: |
777 result = 0; | |
7234 | 778 |
4851 | 779 Fail: |
780 if (nb) | |
8206
0168d22e6bba
fix sorting of non-POD objects
Jaroslav Hajek <highegg@gmail.com>
parents:
7929
diff
changeset
|
781 std::copy (baseb, baseb + nb, dest-(nb-1)); |
4851 | 782 return result; |
7234 | 783 |
4851 | 784 CopyA: |
785 /* The first element of pb belongs at the front of the merge. */ | |
786 dest -= na; | |
787 pa -= na; | |
8678
e2b4c19c455c
redo changeset 4238f2600a17 with fixes to sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8206
diff
changeset
|
788 std::copy_backward (pa+1, pa+1 + na, dest+1 + na); |
4851 | 789 *dest = *pb; |
7234 | 790 |
4851 | 791 return 0; |
792 } | |
793 | |
794 /* Merge the two runs at stack indices i and i+1. | |
795 * Returns 0 on success, -1 on error. | |
796 */ | |
797 template <class T> | |
798 int | |
7433 | 799 octave_sort<T>::merge_at (octave_idx_type i) |
4851 | 800 { |
801 T *pa, *pb; | |
7433 | 802 octave_idx_type na, nb; |
803 octave_idx_type k; | |
4851 | 804 |
805 pa = ms.pending[i].base; | |
806 na = ms.pending[i].len; | |
807 pb = ms.pending[i+1].base; | |
808 nb = ms.pending[i+1].len; | |
809 | |
810 /* Record the length of the combined runs; if i is the 3rd-last | |
811 * run now, also slide over the last run (which isn't involved | |
812 * in this merge). The current run i+1 goes away in any case. | |
813 */ | |
814 ms.pending[i].len = na + nb; | |
815 if (i == ms.n - 3) | |
816 ms.pending[i+1] = ms.pending[i+2]; | |
817 --ms.n; | |
818 | |
819 /* Where does b start in a? Elements in a before that can be | |
820 * ignored (already in place). | |
821 */ | |
7234 | 822 k = gallop_right (*pb, pa, na, 0); |
4851 | 823 if (k < 0) |
824 return -1; | |
825 pa += k; | |
826 na -= k; | |
827 if (na == 0) | |
828 return 0; | |
829 | |
830 /* Where does a end in b? Elements in b after that can be | |
831 * ignored (already in place). | |
832 */ | |
7234 | 833 nb = gallop_left (pa[na-1], pb, nb, nb-1); |
4851 | 834 if (nb <= 0) |
835 return nb; | |
836 | |
837 /* Merge what remains of the runs, using a temp array with | |
838 * min(na, nb) elements. | |
839 */ | |
840 if (na <= nb) | |
7234 | 841 return merge_lo (pa, na, pb, nb); |
4851 | 842 else |
7234 | 843 return merge_hi (pa, na, pb, nb); |
4851 | 844 } |
845 | |
846 /* Examine the stack of runs waiting to be merged, merging adjacent runs | |
847 * until the stack invariants are re-established: | |
848 * | |
849 * 1. len[-3] > len[-2] + len[-1] | |
850 * 2. len[-2] > len[-1] | |
851 * | |
852 * See listsort.txt for more info. | |
853 * | |
854 * Returns 0 on success, -1 on error. | |
855 */ | |
856 template <class T> | |
857 int | |
7234 | 858 octave_sort<T>::merge_collapse (void) |
4851 | 859 { |
860 struct s_slice *p = ms.pending; | |
861 | |
862 while (ms.n > 1) | |
863 { | |
7433 | 864 octave_idx_type n = ms.n - 2; |
4851 | 865 if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len) |
866 { | |
867 if (p[n-1].len < p[n+1].len) | |
868 --n; | |
7234 | 869 if (merge_at (n) < 0) |
4851 | 870 return -1; |
871 } | |
872 else if (p[n].len <= p[n+1].len) | |
873 { | |
7234 | 874 if (merge_at (n) < 0) |
4851 | 875 return -1; |
876 } | |
877 else | |
878 break; | |
879 } | |
7234 | 880 |
4851 | 881 return 0; |
882 } | |
883 | |
884 /* Regardless of invariants, merge all runs on the stack until only one | |
885 * remains. This is used at the end of the mergesort. | |
886 * | |
887 * Returns 0 on success, -1 on error. | |
888 */ | |
889 template <class T> | |
890 int | |
7234 | 891 octave_sort<T>::merge_force_collapse (void) |
4851 | 892 { |
893 struct s_slice *p = ms.pending; | |
894 | |
895 while (ms.n > 1) | |
896 { | |
7433 | 897 octave_idx_type n = ms.n - 2; |
4851 | 898 if (n > 0 && p[n-1].len < p[n+1].len) |
899 --n; | |
7234 | 900 if (merge_at (n) < 0) |
4851 | 901 return -1; |
902 } | |
7234 | 903 |
4851 | 904 return 0; |
905 } | |
906 | |
907 /* Compute a good value for the minimum run length; natural runs shorter | |
908 * than this are boosted artificially via binary insertion. | |
909 * | |
910 * If n < 64, return n (it's too small to bother with fancy stuff). | |
911 * Else if n is an exact power of 2, return 32. | |
912 * Else return an int k, 32 <= k <= 64, such that n/k is close to, but | |
913 * strictly less than, an exact power of 2. | |
914 * | |
915 * See listsort.txt for more info. | |
916 */ | |
917 template <class T> | |
7433 | 918 octave_idx_type |
919 octave_sort<T>::merge_compute_minrun (octave_idx_type n) | |
4851 | 920 { |
7433 | 921 octave_idx_type r = 0; /* becomes 1 if any 1 bits are shifted off */ |
4851 | 922 |
7234 | 923 while (n >= 64) |
924 { | |
925 r |= n & 1; | |
926 n >>= 1; | |
927 } | |
928 | |
4851 | 929 return n + r; |
930 } | |
931 | |
932 template <class T> | |
933 void | |
7433 | 934 octave_sort<T>::sort (T *v, octave_idx_type elements) |
4851 | 935 { |
936 /* Re-initialize the Mergestate as this might be the second time called */ | |
937 ms.n = 0; | |
938 ms.min_gallop = MIN_GALLOP; | |
939 | |
940 if (elements > 1) | |
941 { | |
7433 | 942 octave_idx_type nremaining = elements; |
4851 | 943 T *lo = v; |
944 T *hi = v + elements; | |
945 | |
946 /* March over the array once, left to right, finding natural runs, | |
947 * and extending short natural runs to minrun elements. | |
948 */ | |
7433 | 949 octave_idx_type minrun = merge_compute_minrun (nremaining); |
4851 | 950 do |
951 { | |
7929 | 952 bool descending; |
7433 | 953 octave_idx_type n; |
4851 | 954 |
955 /* Identify next run. */ | |
7929 | 956 n = count_run (lo, hi, descending); |
4851 | 957 if (n < 0) |
958 goto fail; | |
959 if (descending) | |
7234 | 960 reverse_slice (lo, lo + n); |
4851 | 961 /* If short, extend to min(minrun, nremaining). */ |
962 if (n < minrun) | |
963 { | |
7433 | 964 const octave_idx_type force = nremaining <= minrun ? nremaining : minrun; |
7234 | 965 binarysort (lo, lo + force, lo + n); |
4851 | 966 n = force; |
967 } | |
968 /* Push run onto pending-runs stack, and maybe merge. */ | |
7234 | 969 assert (ms.n < MAX_MERGE_PENDING); |
4851 | 970 ms.pending[ms.n].base = lo; |
971 ms.pending[ms.n].len = n; | |
972 ++ms.n; | |
7234 | 973 if (merge_collapse () < 0) |
4851 | 974 goto fail; |
975 /* Advance to find next run. */ | |
976 lo += n; | |
977 nremaining -= n; | |
7234 | 978 } |
979 while (nremaining); | |
4851 | 980 |
7234 | 981 merge_force_collapse (); |
4851 | 982 } |
983 | |
984 fail: | |
985 return; | |
986 } | |
987 | |
988 /* | |
989 ;;; Local Variables: *** | |
990 ;;; mode: C++ *** | |
991 ;;; End: *** | |
992 */ |