# HG changeset patch # User jwe # Date 1081271657 0 # Node ID 047ff938b0d9c7bcc9395e94844946400b1929e4 # Parent 8cc4818a0de0a5a787496467d7726c2a12ad6ca9 [project @ 2004-04-06 17:12:14 by jwe] diff --git a/liboctave/oct-sort.cc b/liboctave/oct-sort.cc new file mode 100644 --- /dev/null +++ b/liboctave/oct-sort.cc @@ -0,0 +1,958 @@ +/* +Copyright (C) 2003 David Bateman + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +Code stolen in large part from Python's, listobject.c, which itself had +no license header. However, thanks to Tim Peters for the parts of the +code I ripped-off. + +As required in the Python license the short description of the changes +made are + +* convert the sorting code in listobject.cc into a generic class, + replacing PyObject* with the type of the class T. + +The Python license is + + PSF LICENSE AGREEMENT FOR PYTHON 2.3 + -------------------------------------- + + 1. This LICENSE AGREEMENT is between the Python Software Foundation + ("PSF"), and the Individual or Organization ("Licensee") accessing and + otherwise using Python 2.3 software in source or binary form and its + associated documentation. + + 2. Subject to the terms and conditions of this License Agreement, PSF + hereby grants Licensee a nonexclusive, royalty-free, world-wide + license to reproduce, analyze, test, perform and/or display publicly, + prepare derivative works, distribute, and otherwise use Python 2.3 + alone or in any derivative version, provided, however, that PSF's + License Agreement and PSF's notice of copyright, i.e., "Copyright (c) + 2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are + retained in Python 2.3 alone or in any derivative version prepared by + Licensee. + + 3. In the event Licensee prepares a derivative work that is based on + or incorporates Python 2.3 or any part thereof, and wants to make + the derivative work available to others as provided herein, then + Licensee hereby agrees to include in any such work a brief summary of + the changes made to Python 2.3. + + 4. PSF is making Python 2.3 available to Licensee on an "AS IS" + basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR + IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND + DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS + FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT + INFRINGE ANY THIRD PARTY RIGHTS. + + 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON + 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS + A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3, + OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + + 6. This License Agreement will automatically terminate upon a material + breach of its terms and conditions. + + 7. Nothing in this License Agreement shall be deemed to create any + relationship of agency, partnership, or joint venture between PSF and + Licensee. This License Agreement does not grant permission to use PSF + trademarks or trade name in a trademark sense to endorse or promote + products or services of Licensee, or any third party. + + 8. By copying, installing or otherwise using Python 2.3, Licensee + agrees to be bound by the terms and conditions of this License + Agreement. +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "oct-obj.h" +#include "lo-mappers.h" +#include "quit.h" +#include "oct-sort.h" + +#define IFLT(a,b) if (compare == NULL ? ((a) < (b)) : compare ((a), (b))) + +template +octave_sort::octave_sort (void) : compare (NULL) +{ + merge_init ( ); + merge_getmem (1024); +} + +template +octave_sort::octave_sort (bool (*comp) (T, T)) : compare (comp) +{ + merge_init ( ); + merge_getmem (1024); +} + +/* Reverse a slice of a list in place, from lo up to (exclusive) hi. */ +template +void +octave_sort::reverse_slice(T *lo, T *hi) +{ + --hi; + while (lo < hi) + { + T t = *lo; + *lo = *hi; + *hi = t; + ++lo; + --hi; + } +} + +template +void +octave_sort::binarysort (T *lo, T *hi, T *start) +{ + register T *l, *p, *r; + register T pivot; + + if (lo == start) + ++start; + + for (; start < hi; ++start) + { + /* set l to where *start belongs */ + l = lo; + r = start; + pivot = *r; + /* Invariants: + * pivot >= all in [lo, l). + * pivot < all in [r, start). + * The second is vacuously true at the start. + */ + do + { + p = l + ((r - l) >> 1); + IFLT (pivot, *p) + r = p; + else + l = p+1; + } + while (l < r); + /* The invariants still hold, so pivot >= all in [lo, l) and + pivot < all in [l, start), so pivot belongs at l. Note + that if there are elements equal to pivot, l points to the + first slot after them -- that's why this sort is stable. + Slide over to make room. + Caution: using memmove is much slower under MSVC 5; + we're not usually moving many slots. */ + for (p = start; p > l; --p) + *p = *(p-1); + *l = pivot; + } + + return; +} + +/* +Return the length of the run beginning at lo, in the slice [lo, hi). lo < hi +is required on entry. "A run" is the longest ascending sequence, with + + lo[0] <= lo[1] <= lo[2] <= ... + +or the longest descending sequence, with + + lo[0] > lo[1] > lo[2] > ... + +Boolean *descending is set to 0 in the former case, or to 1 in the latter. +For its intended use in a stable mergesort, the strictness of the defn of +"descending" is needed so that the caller can safely reverse a descending +sequence without violating stability (strict > ensures there are no equal +elements to get out of order). + +Returns -1 in case of error. +*/ +template +int +octave_sort::count_run(T *lo, T *hi, int *descending) +{ + int n; + + *descending = 0; + ++lo; + if (lo == hi) + return 1; + + n = 2; + + IFLT (*lo, *(lo-1)) + { + *descending = 1; + for (lo = lo+1; lo < hi; ++lo, ++n) + { + IFLT (*lo, *(lo-1)) + ; + else + break; + } + } + else + { + for (lo = lo+1; lo < hi; ++lo, ++n) + { + IFLT (*lo, *(lo-1)) + break; + } + } + + return n; +} + +/* +Locate the proper position of key in a sorted vector; if the vector contains +an element equal to key, return the position immediately to the left of +the leftmost equal element. [gallop_right() does the same except returns +the position to the right of the rightmost equal element (if any).] + +"a" is a sorted vector with n elements, starting at a[0]. n must be > 0. + +"hint" is an index at which to begin the search, 0 <= hint < n. The closer +hint is to the final result, the faster this runs. + +The return value is the int k in 0..n such that + + a[k-1] < key <= a[k] + +pretending that *(a-1) is minus infinity and a[n] is plus infinity. IOW, +key belongs at index k; or, IOW, the first k elements of a should precede +key, and the last n-k should follow key. + +Returns -1 on error. See listsort.txt for info on the method. +*/ +template +int +octave_sort::gallop_left(T key, T *a, int n, int hint) +{ + int ofs; + int lastofs; + int k; + + a += hint; + lastofs = 0; + ofs = 1; + IFLT (*a, key) + { + /* a[hint] < key -- gallop right, until + * a[hint + lastofs] < key <= a[hint + ofs] + */ + const int maxofs = n - hint; /* &a[n-1] is highest */ + while (ofs < maxofs) + { + IFLT (a[ofs], key) + { + lastofs = ofs; + ofs = (ofs << 1) + 1; + if (ofs <= 0) /* int overflow */ + ofs = maxofs; + } + else /* key <= a[hint + ofs] */ + break; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to offsets relative to &a[0]. */ + lastofs += hint; + ofs += hint; + } + else + { + /* key <= a[hint] -- gallop left, until + * a[hint - ofs] < key <= a[hint - lastofs] + */ + const int maxofs = hint + 1; /* &a[0] is lowest */ + while (ofs < maxofs) + { + IFLT (*(a-ofs), key) + break; + /* key <= a[hint - ofs] */ + lastofs = ofs; + ofs = (ofs << 1) + 1; + if (ofs <= 0) /* int overflow */ + ofs = maxofs; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to positive offsets relative to &a[0]. */ + k = lastofs; + lastofs = hint - ofs; + ofs = hint - k; + } + a -= hint; + + /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the + * right of lastofs but no farther right than ofs. Do a binary + * search, with invariant a[lastofs-1] < key <= a[ofs]. + */ + ++lastofs; + while (lastofs < ofs) + { + int m = lastofs + ((ofs - lastofs) >> 1); + + IFLT (a[m], key) + lastofs = m+1; /* a[m] < key */ + else + ofs = m; /* key <= a[m] */ + } + return ofs; +} + +/* +Exactly like gallop_left(), except that if key already exists in a[0:n], +finds the position immediately to the right of the rightmost equal value. + +The return value is the int k in 0..n such that + + a[k-1] <= key < a[k] + +or -1 if error. + +The code duplication is massive, but this is enough different given that +we're sticking to "<" comparisons that it's much harder to follow if +written as one routine with yet another "left or right?" flag. +*/ +template +int +octave_sort::gallop_right(T key, T *a, int n, int hint) +{ + int ofs; + int lastofs; + int k; + + a += hint; + lastofs = 0; + ofs = 1; + IFLT (key, *a) + { + /* key < a[hint] -- gallop left, until + * a[hint - ofs] <= key < a[hint - lastofs] + */ + const int maxofs = hint + 1; /* &a[0] is lowest */ + while (ofs < maxofs) + { + IFLT (key, *(a-ofs)) + { + lastofs = ofs; + ofs = (ofs << 1) + 1; + if (ofs <= 0) /* int overflow */ + ofs = maxofs; + } + else /* a[hint - ofs] <= key */ + break; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to positive offsets relative to &a[0]. */ + k = lastofs; + lastofs = hint - ofs; + ofs = hint - k; + } + else + { + /* a[hint] <= key -- gallop right, until + * a[hint + lastofs] <= key < a[hint + ofs] + */ + const int maxofs = n - hint; /* &a[n-1] is highest */ + while (ofs < maxofs) + { + IFLT (key, a[ofs]) + break; + /* a[hint + ofs] <= key */ + lastofs = ofs; + ofs = (ofs << 1) + 1; + if (ofs <= 0) /* int overflow */ + ofs = maxofs; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to offsets relative to &a[0]. */ + lastofs += hint; + ofs += hint; + } + a -= hint; + + /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the + * right of lastofs but no farther right than ofs. Do a binary + * search, with invariant a[lastofs-1] <= key < a[ofs]. + */ + ++lastofs; + while (lastofs < ofs) + { + int m = lastofs + ((ofs - lastofs) >> 1); + + IFLT (key, a[m]) + ofs = m; /* key < a[m] */ + else + lastofs = m+1; /* a[m] <= key */ + } + return ofs; +} + +/* Conceptually a MergeState's constructor. */ +template +void +octave_sort::merge_init(void) +{ + ms.a = NULL; + ms.alloced = 0; + ms.n = 0; + ms.min_gallop = MIN_GALLOP; +} + +/* Free all the temp memory owned by the MergeState. This must be called + * when you're done with a MergeState, and may be called before then if + * you want to free the temp memory early. + */ +template +void +octave_sort::merge_freemem(void) +{ + if (ms.a) + free (ms.a); + ms.alloced = 0; + ms.a = NULL; +} + +static inline int +roundupsize(int n) +{ + unsigned int nbits = 3; + unsigned int n2 = (unsigned int)n >> 8; + + /* Round up: + * If n < 256, to a multiple of 8. + * If n < 2048, to a multiple of 64. + * If n < 16384, to a multiple of 512. + * If n < 131072, to a multiple of 4096. + * If n < 1048576, to a multiple of 32768. + * If n < 8388608, to a multiple of 262144. + * If n < 67108864, to a multiple of 2097152. + * If n < 536870912, to a multiple of 16777216. + * ... + * If n < 2**(5+3*i), to a multiple of 2**(3*i). + * + * This over-allocates proportional to the list size, making room + * for additional growth. The over-allocation is mild, but is + * enough to give linear-time amortized behavior over a long + * sequence of appends() in the presence of a poorly-performing + * system realloc() (which is a reality, e.g., across all flavors + * of Windows, with Win9x behavior being particularly bad -- and + * we've still got address space fragmentation problems on Win9x + * even with this scheme, although it requires much longer lists to + * provoke them than it used to). + */ + while (n2) { + n2 >>= 3; + nbits += 3; + } + return ((n >> nbits) + 1) << nbits; +} + +/* Ensure enough temp memory for 'need' array slots is available. + * Returns 0 on success and -1 if the memory can't be gotten. + */ +template +int +octave_sort::merge_getmem(int need) +{ + if (need <= ms.alloced) + return 0; + + need = roundupsize(need); + /* Don't realloc! That can cost cycles to copy the old data, but + * we don't care what's in the block. + */ + merge_freemem( ); + ms.a = (T *) malloc (need * sizeof (T)); + if (ms.a) { + ms.alloced = need; + return 0; + } + merge_freemem( ); /* reset to sane state */ + return -1; +} + +#define MERGE_GETMEM(NEED) ((NEED) <= ms.alloced ? 0 : \ + merge_getmem(NEED)) + +/* Merge the na elements starting at pa with the nb elements starting at pb + * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. + * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the + * merge, and should have na <= nb. See listsort.txt for more info. + * Return 0 if successful, -1 if error. + */ +template +int +octave_sort::merge_lo(T *pa, int na, T *pb, int nb) +{ + int k; + T *dest; + int result = -1; /* guilty until proved innocent */ + int min_gallop = ms.min_gallop; + + if (MERGE_GETMEM(na) < 0) + return -1; + memcpy(ms.a, pa, na * sizeof(T)); + dest = pa; + pa = ms.a; + + *dest++ = *pb++; + --nb; + if (nb == 0) + goto Succeed; + if (na == 1) + goto CopyB; + + for (;;) { + int acount = 0; /* # of times A won in a row */ + int bcount = 0; /* # of times B won in a row */ + + /* Do the straightforward thing until (if ever) one run + * appears to win consistently. + */ + for (;;) { + + IFLT (*pb, *pa) + { + *dest++ = *pb++; + ++bcount; + acount = 0; + --nb; + if (nb == 0) + goto Succeed; + if (bcount >= min_gallop) + break; + } + else + { + *dest++ = *pa++; + ++acount; + bcount = 0; + --na; + if (na == 1) + goto CopyB; + if (acount >= min_gallop) + break; + } + } + + /* One run is winning so consistently that galloping may + * be a huge win. So try that, and continue galloping until + * (if ever) neither run appears to be winning consistently + * anymore. + */ + ++min_gallop; + do + { + min_gallop -= min_gallop > 1; + ms.min_gallop = min_gallop; + k = gallop_right(*pb, pa, na, 0); + acount = k; + if (k) + { + if (k < 0) + goto Fail; + memcpy(dest, pa, k * sizeof(T)); + dest += k; + pa += k; + na -= k; + if (na == 1) + goto CopyB; + /* na==0 is impossible now if the comparison + * function is consistent, but we can't assume + * that it is. + */ + if (na == 0) + goto Succeed; + } + *dest++ = *pb++; + --nb; + if (nb == 0) + goto Succeed; + + k = gallop_left(*pa, pb, nb, 0); + bcount = k; + if (k) { + if (k < 0) + goto Fail; + memmove(dest, pb, k * sizeof(T)); + dest += k; + pb += k; + nb -= k; + if (nb == 0) + goto Succeed; + } + *dest++ = *pa++; + --na; + if (na == 1) + goto CopyB; + } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); + ++min_gallop; /* penalize it for leaving galloping mode */ + ms.min_gallop = min_gallop; + } + Succeed: + result = 0; + Fail: + if (na) + memcpy(dest, pa, na * sizeof(T)); + return result; + CopyB: + /* The last element of pa belongs at the end of the merge. */ + memmove(dest, pb, nb * sizeof(T)); + dest[nb] = *pa; + return 0; +} + +/* Merge the na elements starting at pa with the nb elements starting at pb + * in a stable way, in-place. na and nb must be > 0, and pa + na == pb. + * Must also have that *pb < *pa, that pa[na-1] belongs at the end of the + * merge, and should have na >= nb. See listsort.txt for more info. + * Return 0 if successful, -1 if error. + */ +template +int +octave_sort::merge_hi(T *pa, int na, T *pb, int nb) +{ + int k; + T *dest; + int result = -1; /* guilty until proved innocent */ + T *basea; + T *baseb; + int min_gallop = ms.min_gallop; + + if (MERGE_GETMEM(nb) < 0) + return -1; + dest = pb + nb - 1; + memcpy(ms.a, pb, nb * sizeof(T)); + basea = pa; + baseb = ms.a; + pb = ms.a + nb - 1; + pa += na - 1; + + *dest-- = *pa--; + --na; + if (na == 0) + goto Succeed; + if (nb == 1) + goto CopyA; + + for (;;) + { + int acount = 0; /* # of times A won in a row */ + int bcount = 0; /* # of times B won in a row */ + + /* Do the straightforward thing until (if ever) one run + * appears to win consistently. + */ + for (;;) + { + IFLT (*pb, *pa) + { + *dest-- = *pa--; + ++acount; + bcount = 0; + --na; + if (na == 0) + goto Succeed; + if (acount >= min_gallop) + break; + } + else + { + *dest-- = *pb--; + ++bcount; + acount = 0; + --nb; + if (nb == 1) + goto CopyA; + if (bcount >= min_gallop) + break; + } + } + + /* One run is winning so consistently that galloping may + * be a huge win. So try that, and continue galloping until + * (if ever) neither run appears to be winning consistently + * anymore. + */ + ++min_gallop; + do + { + min_gallop -= min_gallop > 1; + ms.min_gallop = min_gallop; + k = gallop_right(*pb, basea, na, na-1); + if (k < 0) + goto Fail; + k = na - k; + acount = k; + if (k) + { + dest -= k; + pa -= k; + memmove(dest+1, pa+1, k * sizeof(T )); + na -= k; + if (na == 0) + goto Succeed; + } + *dest-- = *pb--; + --nb; + if (nb == 1) + goto CopyA; + + k = gallop_left(*pa, baseb, nb, nb-1); + if (k < 0) + goto Fail; + k = nb - k; + bcount = k; + if (k) + { + dest -= k; + pb -= k; + memcpy(dest+1, pb+1, k * sizeof(T)); + nb -= k; + if (nb == 1) + goto CopyA; + /* nb==0 is impossible now if the comparison + * function is consistent, but we can't assume + * that it is. + */ + if (nb == 0) + goto Succeed; + } + *dest-- = *pa--; + --na; + if (na == 0) + goto Succeed; + } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP); + ++min_gallop; /* penalize it for leaving galloping mode */ + ms.min_gallop = min_gallop; + } +Succeed: + result = 0; +Fail: + if (nb) + memcpy(dest-(nb-1), baseb, nb * sizeof(T)); + return result; +CopyA: + /* The first element of pb belongs at the front of the merge. */ + dest -= na; + pa -= na; + memmove(dest+1, pa+1, na * sizeof(T)); + *dest = *pb; + return 0; +} + +/* Merge the two runs at stack indices i and i+1. + * Returns 0 on success, -1 on error. + */ +template +int +octave_sort::merge_at(int i) +{ + T *pa, *pb; + int na, nb; + int k; + + pa = ms.pending[i].base; + na = ms.pending[i].len; + pb = ms.pending[i+1].base; + nb = ms.pending[i+1].len; + + /* Record the length of the combined runs; if i is the 3rd-last + * run now, also slide over the last run (which isn't involved + * in this merge). The current run i+1 goes away in any case. + */ + ms.pending[i].len = na + nb; + if (i == ms.n - 3) + ms.pending[i+1] = ms.pending[i+2]; + --ms.n; + + /* Where does b start in a? Elements in a before that can be + * ignored (already in place). + */ + k = gallop_right(*pb, pa, na, 0); + if (k < 0) + return -1; + pa += k; + na -= k; + if (na == 0) + return 0; + + /* Where does a end in b? Elements in b after that can be + * ignored (already in place). + */ + nb = gallop_left(pa[na-1], pb, nb, nb-1); + if (nb <= 0) + return nb; + + /* Merge what remains of the runs, using a temp array with + * min(na, nb) elements. + */ + if (na <= nb) + return merge_lo(pa, na, pb, nb); + else + return merge_hi(pa, na, pb, nb); +} + +/* Examine the stack of runs waiting to be merged, merging adjacent runs + * until the stack invariants are re-established: + * + * 1. len[-3] > len[-2] + len[-1] + * 2. len[-2] > len[-1] + * + * See listsort.txt for more info. + * + * Returns 0 on success, -1 on error. + */ +template +int +octave_sort::merge_collapse(void) +{ + struct s_slice *p = ms.pending; + + while (ms.n > 1) + { + int n = ms.n - 2; + if (n > 0 && p[n-1].len <= p[n].len + p[n+1].len) + { + if (p[n-1].len < p[n+1].len) + --n; + if (merge_at(n) < 0) + return -1; + } + else if (p[n].len <= p[n+1].len) + { + if (merge_at(n) < 0) + return -1; + } + else + break; + } + return 0; +} + +/* Regardless of invariants, merge all runs on the stack until only one + * remains. This is used at the end of the mergesort. + * + * Returns 0 on success, -1 on error. + */ +template +int +octave_sort::merge_force_collapse(void) +{ + struct s_slice *p = ms.pending; + + while (ms.n > 1) + { + int n = ms.n - 2; + if (n > 0 && p[n-1].len < p[n+1].len) + --n; + if (merge_at(n) < 0) + return -1; + } + return 0; +} + +/* Compute a good value for the minimum run length; natural runs shorter + * than this are boosted artificially via binary insertion. + * + * If n < 64, return n (it's too small to bother with fancy stuff). + * Else if n is an exact power of 2, return 32. + * Else return an int k, 32 <= k <= 64, such that n/k is close to, but + * strictly less than, an exact power of 2. + * + * See listsort.txt for more info. + */ +template +int +octave_sort::merge_compute_minrun(int n) +{ + int r = 0; /* becomes 1 if any 1 bits are shifted off */ + + while (n >= 64) { + r |= n & 1; + n >>= 1; + } + return n + r; +} + +template +void +octave_sort::sort (T *v, int elements) +{ + /* Re-initialize the Mergestate as this might be the second time called */ + ms.n = 0; + ms.min_gallop = MIN_GALLOP; + + if (elements > 1) + { + int nremaining = elements; + T *lo = v; + T *hi = v + elements; + + /* March over the array once, left to right, finding natural runs, + * and extending short natural runs to minrun elements. + */ + int minrun = merge_compute_minrun(nremaining); + do + { + int descending; + int n; + + /* Identify next run. */ + n = count_run(lo, hi, &descending); + if (n < 0) + goto fail; + if (descending) + reverse_slice(lo, lo + n); + /* If short, extend to min(minrun, nremaining). */ + if (n < minrun) + { + const int force = nremaining <= minrun ? nremaining : minrun; + binarysort(lo, lo + force, lo + n); + n = force; + } + /* Push run onto pending-runs stack, and maybe merge. */ + assert(ms.n < MAX_MERGE_PENDING); + ms.pending[ms.n].base = lo; + ms.pending[ms.n].len = n; + ++ms.n; + if (merge_collapse( ) < 0) + goto fail; + /* Advance to find next run. */ + lo += n; + nremaining -= n; + } while (nremaining); + + merge_force_collapse( ); + } + +fail: + return; +} + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff --git a/liboctave/oct-sort.h b/liboctave/oct-sort.h new file mode 100644 --- /dev/null +++ b/liboctave/oct-sort.h @@ -0,0 +1,203 @@ +/* +Copyright (C) 2003 David Bateman + +This file is part of Octave. + +Octave is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +Octave is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with Octave; see the file COPYING. If not, write to the Free +Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +Code stolen in large part from Python's, listobject.c, which itself had +no license header. However, thanks to Tim Peters for the parts of the +code I ripped-off. + +As required in the Python license the short description of the changes +made are + +* convert the sorting code in listobject.cc into a generic class, + replacing PyObject* with the type of the class T. + +The Python license is + + PSF LICENSE AGREEMENT FOR PYTHON 2.3 + -------------------------------------- + + 1. This LICENSE AGREEMENT is between the Python Software Foundation + ("PSF"), and the Individual or Organization ("Licensee") accessing and + otherwise using Python 2.3 software in source or binary form and its + associated documentation. + + 2. Subject to the terms and conditions of this License Agreement, PSF + hereby grants Licensee a nonexclusive, royalty-free, world-wide + license to reproduce, analyze, test, perform and/or display publicly, + prepare derivative works, distribute, and otherwise use Python 2.3 + alone or in any derivative version, provided, however, that PSF's + License Agreement and PSF's notice of copyright, i.e., "Copyright (c) + 2001, 2002, 2003 Python Software Foundation; All Rights Reserved" are + retained in Python 2.3 alone or in any derivative version prepared by + Licensee. + + 3. In the event Licensee prepares a derivative work that is based on + or incorporates Python 2.3 or any part thereof, and wants to make + the derivative work available to others as provided herein, then + Licensee hereby agrees to include in any such work a brief summary of + the changes made to Python 2.3. + + 4. PSF is making Python 2.3 available to Licensee on an "AS IS" + basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR + IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND + DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS + FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 2.3 WILL NOT + INFRINGE ANY THIRD PARTY RIGHTS. + + 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON + 2.3 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS + A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 2.3, + OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. + + 6. This License Agreement will automatically terminate upon a material + breach of its terms and conditions. + + 7. Nothing in this License Agreement shall be deemed to create any + relationship of agency, partnership, or joint venture between PSF and + Licensee. This License Agreement does not grant permission to use PSF + trademarks or trade name in a trademark sense to endorse or promote + products or services of Licensee, or any third party. + + 8. By copying, installing or otherwise using Python 2.3, Licensee + agrees to be bound by the terms and conditions of this License + Agreement. +*/ + +#if !defined (octave_sort_h) +#define octave_sort_h 1 + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "oct-obj.h" +#include "lo-mappers.h" +#include "quit.h" + +/* The maximum number of entries in a MergeState's pending-runs stack. + * This is enough to sort arrays of size up to about + * 32 * phi ** MAX_MERGE_PENDING + * where phi ~= 1.618. 85 is ridiculously large enough, good for an array + * with 2**64 elements. + */ +#define MAX_MERGE_PENDING 85 + +/* When we get into galloping mode, we stay there until both runs win less + * often than MIN_GALLOP consecutive times. See listsort.txt for more info. + */ +#define MIN_GALLOP 7 + +/* Avoid malloc for small temp arrays. */ +#define MERGESTATE_TEMP_SIZE 1024 + +template +class +octave_sort +{ + public: + octave_sort (void); + + octave_sort (bool (*comp) (T, T)); + + ~octave_sort (void) { merge_freemem ( ); } + + void sort (T *v, int elements); + + private: + /* One MergeState exists on the stack per invocation of mergesort. It's just + * a convenient way to pass state around among the helper functions. + * + * DGB: This isn't needed with mergesort in a class, but it doesn't slow + * things up, and it is likely to make my life easier for any potential + * backporting of changes in the Python code. + */ + + struct s_slice + { + T *base; + int len; + }; + + typedef struct s_MergeState + { + /* This controls when we get *into* galloping mode. It's initialized + * to MIN_GALLOP. merge_lo and merge_hi tend to nudge it higher for + * random data, and lower for highly structured data. + */ + int min_gallop; + + /* 'a' is temp storage to help with merges. It contains room for + * alloced entries. + */ + T *a; /* may point to temparray below */ + int alloced; + + /* A stack of n pending runs yet to be merged. Run #i starts at + * address base[i] and extends for len[i] elements. It's always + * true (so long as the indices are in bounds) that + * + * pending[i].base + pending[i].len == pending[i+1].base + * + * so we could cut the storage for this, but it's a minor amount, + * and keeping all the info explicit simplifies the code. + */ + int n; + struct s_slice pending[MAX_MERGE_PENDING]; + } MergeState; + + bool (*compare)(T, T); + + MergeState ms; + + void reverse_slice (T *lo, T *hi); + + void binarysort (T *lo, T *hi, T *start); + + int count_run(T *lo, T *hi, int *descending); + + int gallop_left(T key, T *a, int n, int hint); + + int gallop_right(T key, T *a, int n, int hint); + + void merge_init(void); + + void merge_freemem(void); + + int merge_getmem(int need); + + int merge_lo(T *pa, int na, T *pb, int nb); + + int merge_hi(T *pa, int na, T *pb, int nb); + + int merge_at(int i); + + int merge_collapse(void); + + int merge_force_collapse(void); + + int merge_compute_minrun(int n); +}; + +#endif + +/* +;;; Local Variables: *** +;;; mode: C++ *** +;;; End: *** +*/ diff --git a/src/DLD-FUNCTIONS/sort.cc b/src/DLD-FUNCTIONS/sort.cc --- a/src/DLD-FUNCTIONS/sort.cc +++ b/src/DLD-FUNCTIONS/sort.cc @@ -117,10 +117,10 @@ return retval; dim_vector dv = m.dims (); - int ns = dv (dim); - int iter = dv.numel () / ns; - int stride = 1; - for (int i = 0; i < dim; i++) + unsigned int ns = dv (dim); + unsigned int iter = dv.numel () / ns; + unsigned int stride = 1; + for (unsigned int i = 0; i < (unsigned int)dim; i++) stride *= dv(i); #if defined(HAVE_IEEE754_COMPLIANCE) && defined(EIGHT_BYTE_INT) @@ -135,15 +135,15 @@ OCTAVE_LOCAL_BUFFER (vec_index *, vi, ns); OCTAVE_LOCAL_BUFFER (vec_index, vix, ns); - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) vi[i] = &vix[i]; NDArray idx (dv); - for (int j = 0; j < iter; j++) + for (unsigned int j = 0; j < iter; j++) { - int offset = j; - int offset2 = 0; + unsigned int offset = j; + unsigned int offset2 = 0; while (offset >= stride) { offset -= stride; @@ -154,7 +154,7 @@ /* Flip the data in the vector so that int compares on * IEEE754 give the correct ordering */ - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) { vi[i]->vec = FloatFlip (p[i*stride + offset]); vi[i]->indx = i + 1; @@ -165,7 +165,7 @@ /* Flip the data out of the vector so that int compares on * IEEE754 give the correct ordering */ - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) { p[i*stride + offset] = IFloatFlip (vi[i]->vec); idx(i*stride + offset) = vi[i]->indx; @@ -200,16 +200,15 @@ else { octave_sort ieee754_sort; - OCTAVE_LOCAL_BUFFER (unsigned EIGHT_BYTE_INT, vi, ns); if (stride == 1) { - for (int j = 0; j < iter; j++) + for (unsigned int j = 0; j < iter; j++) { /* Flip the data in the vector so that int compares on * IEEE754 give the correct ordering */ - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) p[i] = FloatFlip (p[i]); ieee754_sort.sort (p, ns); @@ -217,7 +216,7 @@ /* Flip the data out of the vector so that int compares on * IEEE754 give the correct ordering */ - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) p[i] = IFloatFlip (p[i]); /* There are two representations of NaN. One will be sorted to @@ -241,10 +240,12 @@ } else { - for (int j = 0; j < iter; j++) + OCTAVE_LOCAL_BUFFER (unsigned EIGHT_BYTE_INT, vi, ns); + + for (unsigned int j = 0; j < iter; j++) { - int offset = j; - int offset2 = 0; + unsigned int offset = j; + unsigned int offset2 = 0; while (offset >= stride) { offset -= stride; @@ -255,7 +256,7 @@ /* Flip the data in the vector so that int compares on * IEEE754 give the correct ordering */ - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) vi[i] = FloatFlip (p[i*stride + offset]); ieee754_sort.sort (vi, ns); @@ -263,7 +264,7 @@ /* Flip the data out of the vector so that int compares on * IEEE754 give the correct ordering */ - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) p[i*stride + offset] = IFloatFlip (vi[i]); /* There are two representations of NaN. One will be sorted to @@ -291,18 +292,18 @@ OCTAVE_LOCAL_BUFFER (vec_index *, vi, ns); OCTAVE_LOCAL_BUFFER (vec_index, vix, ns); - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) vi[i] = &vix[i]; NDArray idx (dv); if (stride == 1) { - for (int j = 0; j < iter; j++) + for (unsigned int j = 0; j < iter; j++) { - int offset = j * ns; + unsigned int offset = j * ns; - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) { vi[i]->vec = v[i]; vi[i]->indx = i + 1; @@ -310,7 +311,7 @@ indexed_double_sort.sort (vi, ns); - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) { v[i] = vi[i]->vec; idx(i + offset) = vi[i]->indx; @@ -320,10 +321,10 @@ } else { - for (int j = 0; j < iter; j++) + for (unsigned int j = 0; j < iter; j++) { - int offset = j; - int offset2 = 0; + unsigned int offset = j; + unsigned int offset2 = 0; while (offset >= stride) { offset -= stride; @@ -331,7 +332,7 @@ } offset += offset2 * stride * ns; - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) { vi[i]->vec = v[i*stride + offset]; vi[i]->indx = i + 1; @@ -339,7 +340,7 @@ indexed_double_sort.sort (vi, ns); - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) { v[i*stride+offset] = vi[i]->vec; idx(i*stride+offset) = vi[i]->indx; @@ -354,7 +355,7 @@ octave_sort double_sort (double_compare); if (stride == 1) - for (int j = 0; j < iter; j++) + for (unsigned int j = 0; j < iter; j++) { double_sort.sort (v, ns); v += ns; @@ -362,10 +363,10 @@ else { OCTAVE_LOCAL_BUFFER (double, vi, ns); - for (int j = 0; j < iter; j++) + for (unsigned int j = 0; j < iter; j++) { - int offset = j; - int offset2 = 0; + unsigned int offset = j; + unsigned int offset2 = 0; while (offset >= stride) { offset -= stride; @@ -373,12 +374,12 @@ } offset += offset2 * stride * ns; - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) vi[i] = v[i*stride + offset]; double_sort.sort (vi, ns); - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) v[i*stride + offset] = vi[i]; } } @@ -397,10 +398,10 @@ return retval; dim_vector dv = m.dims (); - int ns = dv (dim); - int iter = dv.numel () / ns; - int stride = 1; - for (int i = 0; i < dim; i++) + unsigned int ns = dv (dim); + unsigned int iter = dv.numel () / ns; + unsigned int stride = 1; + for (unsigned int i = 0; i < (unsigned int)dim; i++) stride *= dv(i); octave_sort indexed_double_sort (complex_compare); @@ -410,18 +411,18 @@ OCTAVE_LOCAL_BUFFER (complex_vec_index *, vi, ns); OCTAVE_LOCAL_BUFFER (complex_vec_index, vix, ns); - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) vi[i] = &vix[i]; NDArray idx (dv); if (stride == 1) { - for (int j = 0; j < iter; j++) + for (unsigned int j = 0; j < iter; j++) { - int offset = j * ns; + unsigned int offset = j * ns; - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) { vi[i]->vec = v[i]; vi[i]->indx = i + 1; @@ -431,7 +432,7 @@ if (return_idx) { - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) { v[i] = vi[i]->vec; idx(i + offset) = vi[i]->indx; @@ -439,7 +440,7 @@ } else { - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) v[i] = vi[i]->vec; } v += ns; @@ -447,10 +448,10 @@ } else { - for (int j = 0; j < iter; j++) + for (unsigned int j = 0; j < iter; j++) { - int offset = j; - int offset2 = 0; + unsigned int offset = j; + unsigned int offset2 = 0; while (offset >= stride) { offset -= stride; @@ -458,7 +459,7 @@ } offset += offset2 * stride * ns; - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) { vi[i]->vec = v[i*stride + offset]; vi[i]->indx = i + 1; @@ -468,7 +469,7 @@ if (return_idx) { - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) { v[i*stride + offset] = vi[i]->vec; idx(i*stride + offset) = vi[i]->indx; @@ -476,7 +477,7 @@ } else { - for (int i = 0; i < ns; i++) + for (unsigned int i = 0; i < ns; i++) v[i*stride + offset] = vi[i]->vec; } }