Mercurial > hg > octave-lyh
annotate liboctave/Array-d.cc @ 8651:ea8e65ca234f
reduce memory usage in sorting
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Tue, 03 Feb 2009 08:16:39 +0100 |
parents | 25bc2d31e1bf |
children | 314be237cd5b |
rev | line source |
---|---|
757 | 1 /* |
2 | |
7017 | 3 Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2003, 2004, |
4 2005, 2006, 2007 John W. Eaton | |
757 | 5 |
6 This file is part of Octave. | |
7 | |
8 Octave is free software; you can redistribute it and/or modify it | |
9 under the terms of the GNU General Public License as published by the | |
7016 | 10 Free Software Foundation; either version 3 of the License, or (at your |
11 option) any later version. | |
757 | 12 |
13 Octave is distributed in the hope that it will be useful, but WITHOUT | |
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
7016 | 19 along with Octave; see the file COPYING. If not, see |
20 <http://www.gnu.org/licenses/>. | |
757 | 21 |
22 */ | |
23 | |
2006 | 24 #ifdef HAVE_CONFIG_H |
25 #include <config.h> | |
26 #endif | |
27 | |
757 | 28 // Instantiate Arrays of double values. |
29 | |
30 #include "Array.h" | |
31 #include "Array.cc" | |
7433 | 32 #include "oct-sort.cc" |
8377
25bc2d31e1bf
improve OCTAVE_LOCAL_BUFFER
Jaroslav Hajek <highegg@gmail.com>
parents:
8290
diff
changeset
|
33 #include "oct-locbuf.h" |
7433 | 34 |
35 #if defined (HAVE_IEEE754_DATA_FORMAT) | |
36 | |
37 static inline uint64_t | |
38 FloatFlip (uint64_t f) | |
39 { | |
40 uint64_t mask | |
41 = -static_cast<int64_t>(f >> 63) | 0x8000000000000000ULL; | |
42 | |
43 return f ^ mask; | |
44 } | |
45 | |
46 static inline uint64_t | |
47 IFloatFlip (uint64_t f) | |
48 { | |
49 uint64_t mask = ((f >> 63) - 1) | 0x8000000000000000ULL; | |
50 | |
51 return f ^ mask; | |
52 } | |
53 | |
54 template <> | |
55 bool | |
56 ascending_compare (double a, double b) | |
57 { | |
58 return (xisnan (b) || (a < b)); | |
59 } | |
60 | |
61 template <> | |
62 bool | |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
63 ascending_compare (const double *a, const double *b) |
7433 | 64 { |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
65 return (xisnan (*b) || (*a < *b)); |
7433 | 66 } |
67 | |
68 template <> | |
69 bool | |
70 descending_compare (double a, double b) | |
71 { | |
72 return (xisnan (a) || (a > b)); | |
73 } | |
74 | |
75 template <> | |
76 bool | |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
77 descending_compare (const double *a, const double *b) |
7433 | 78 { |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
79 return (xisnan (*b) || (*a > *b)); |
7433 | 80 } |
81 | |
82 INSTANTIATE_ARRAY_SORT (uint64_t); | |
83 | |
84 template <> | |
85 Array<double> | |
86 Array<double>::sort (octave_idx_type dim, sortmode mode) const | |
87 { | |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
88 Array<double> m (dims ()); |
7433 | 89 |
90 dim_vector dv = m.dims (); | |
91 | |
92 if (m.length () < 1) | |
93 return m; | |
94 | |
95 octave_idx_type ns = dv(dim); | |
96 octave_idx_type iter = dv.numel () / ns; | |
97 octave_idx_type stride = 1; | |
98 for (int i = 0; i < dim; i++) | |
99 stride *= dv(i); | |
100 | |
101 double *v = m.fortran_vec (); | |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
102 const double *ov = data (); |
7433 | 103 |
104 uint64_t *p = reinterpret_cast<uint64_t *> (v); | |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
105 const uint64_t *op = reinterpret_cast<const uint64_t *> (ov); |
7433 | 106 |
107 octave_sort<uint64_t> lsort; | |
108 | |
109 if (mode == ASCENDING) | |
110 lsort.set_compare (ascending_compare); | |
111 else if (mode == DESCENDING) | |
112 lsort.set_compare (descending_compare); | |
7463
2467639bd8c0
eliminate UNDEFINED sort mode
John W. Eaton <jwe@octave.org>
parents:
7443
diff
changeset
|
113 else |
2467639bd8c0
eliminate UNDEFINED sort mode
John W. Eaton <jwe@octave.org>
parents:
7443
diff
changeset
|
114 abort (); |
7433 | 115 |
116 if (stride == 1) | |
117 { | |
118 for (octave_idx_type j = 0; j < iter; j++) | |
119 { | |
120 // Flip the data in the vector so that int compares on | |
121 // IEEE754 give the correct ordering. | |
122 | |
123 for (octave_idx_type i = 0; i < ns; i++) | |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
124 p[i] = FloatFlip (op[i]); |
7433 | 125 |
126 lsort.sort (p, ns); | |
127 | |
128 // Flip the data out of the vector so that int compares | |
129 // on IEEE754 give the correct ordering. | |
130 | |
131 for (octave_idx_type i = 0; i < ns; i++) | |
132 p[i] = IFloatFlip (p[i]); | |
133 | |
134 // There are two representations of NaN. One will be | |
135 // sorted to the beginning of the vector and the other | |
136 // to the end. If it will be sorted incorrectly, fix | |
137 // things up. | |
138 | |
139 if (lo_ieee_signbit (octave_NaN)) | |
140 { | |
7463
2467639bd8c0
eliminate UNDEFINED sort mode
John W. Eaton <jwe@octave.org>
parents:
7443
diff
changeset
|
141 if (mode == ASCENDING) |
7433 | 142 { |
143 octave_idx_type i = 0; | |
144 double *vtmp = reinterpret_cast<double *> (p); | |
7924 | 145 while (xisnan (vtmp[i++]) && i < ns) |
146 /* do nothing */; | |
7433 | 147 for (octave_idx_type l = 0; l < ns - i + 1; l++) |
148 vtmp[l] = vtmp[l+i-1]; | |
149 for (octave_idx_type l = ns - i + 1; l < ns; l++) | |
150 vtmp[l] = octave_NaN; | |
151 } | |
152 else | |
153 { | |
154 octave_idx_type i = ns; | |
155 double *vtmp = reinterpret_cast<double *> (p); | |
7924 | 156 while (xisnan (vtmp[--i]) && i > 0) |
157 /* do nothing */; | |
7433 | 158 for (octave_idx_type l = i; l >= 0; l--) |
159 vtmp[l-i+ns-1] = vtmp[l]; | |
160 for (octave_idx_type l = 0; l < ns - i - 1; l++) | |
161 vtmp[l] = octave_NaN; | |
162 } | |
163 } | |
164 | |
165 p += ns; | |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
166 op += ns; |
7433 | 167 } |
168 } | |
169 else | |
170 { | |
171 OCTAVE_LOCAL_BUFFER (uint64_t, vi, ns); | |
172 | |
173 for (octave_idx_type j = 0; j < iter; j++) | |
174 { | |
175 octave_idx_type offset = j; | |
176 octave_idx_type offset2 = 0; | |
177 while (offset >= stride) | |
178 { | |
179 offset -= stride; | |
180 offset2++; | |
181 } | |
182 offset += offset2 * stride * ns; | |
183 | |
184 // Flip the data in the vector so that int compares on | |
185 // IEEE754 give the correct ordering. | |
186 | |
187 for (octave_idx_type i = 0; i < ns; i++) | |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
188 vi[i] = FloatFlip (op[i*stride + offset]); |
7433 | 189 |
190 lsort.sort (vi, ns); | |
191 | |
192 // Flip the data out of the vector so that int compares | |
193 // on IEEE754 give the correct ordering. | |
194 | |
195 for (octave_idx_type i = 0; i < ns; i++) | |
196 p[i*stride + offset] = IFloatFlip (vi[i]); | |
197 | |
198 // There are two representations of NaN. One will be | |
199 // sorted to the beginning of the vector and the other | |
200 // to the end. If it will be sorted to the beginning, | |
201 // fix things up. | |
202 | |
203 if (lo_ieee_signbit (octave_NaN)) | |
204 { | |
7463
2467639bd8c0
eliminate UNDEFINED sort mode
John W. Eaton <jwe@octave.org>
parents:
7443
diff
changeset
|
205 if (mode == ASCENDING) |
7433 | 206 { |
207 octave_idx_type i = 0; | |
7924 | 208 while (xisnan (v[i++*stride + offset]) && i < ns) |
209 /* do nothing */; | |
7433 | 210 for (octave_idx_type l = 0; l < ns - i + 1; l++) |
211 v[l*stride + offset] = v[(l+i-1)*stride + offset]; | |
212 for (octave_idx_type l = ns - i + 1; l < ns; l++) | |
213 v[l*stride + offset] = octave_NaN; | |
214 } | |
215 else | |
216 { | |
217 octave_idx_type i = ns; | |
7924 | 218 while (xisnan (v[--i*stride + offset]) && i > 0) |
219 /* do nothing */; | |
7433 | 220 for (octave_idx_type l = i; l >= 0; l--) |
221 v[(l-i+ns-1)*stride + offset] = v[l*stride + offset]; | |
222 for (octave_idx_type l = 0; l < ns - i - 1; l++) | |
223 v[l*stride + offset] = octave_NaN; | |
224 } | |
225 } | |
226 } | |
227 } | |
228 | |
229 return m; | |
230 } | |
231 | |
232 template <> | |
233 Array<double> | |
234 Array<double>::sort (Array<octave_idx_type> &sidx, octave_idx_type dim, | |
235 sortmode mode) const | |
236 { | |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
237 Array<double> m (dims ()); |
7433 | 238 |
239 dim_vector dv = m.dims (); | |
240 | |
241 if (m.length () < 1) | |
242 { | |
243 sidx = Array<octave_idx_type> (dv); | |
244 return m; | |
245 } | |
246 | |
247 octave_idx_type ns = dv(dim); | |
248 octave_idx_type iter = dv.numel () / ns; | |
249 octave_idx_type stride = 1; | |
250 for (int i = 0; i < dim; i++) | |
251 stride *= dv(i); | |
252 | |
253 double *v = m.fortran_vec (); | |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
254 const double *ov = data (); |
7433 | 255 |
256 uint64_t *p = reinterpret_cast<uint64_t *> (v); | |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
257 const uint64_t *op = reinterpret_cast<const uint64_t *> (ov); |
7433 | 258 |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
259 octave_sort<const uint64_t *> indexed_sort; |
7433 | 260 |
261 if (mode == ASCENDING) | |
262 indexed_sort.set_compare (ascending_compare); | |
263 else if (mode == DESCENDING) | |
264 indexed_sort.set_compare (descending_compare); | |
7463
2467639bd8c0
eliminate UNDEFINED sort mode
John W. Eaton <jwe@octave.org>
parents:
7443
diff
changeset
|
265 else |
2467639bd8c0
eliminate UNDEFINED sort mode
John W. Eaton <jwe@octave.org>
parents:
7443
diff
changeset
|
266 abort (); |
7433 | 267 |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
268 OCTAVE_LOCAL_BUFFER (const uint64_t *, vi, ns); |
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
269 OCTAVE_LOCAL_BUFFER (uint64_t, vix, ns); |
7433 | 270 |
271 sidx = Array<octave_idx_type> (dv); | |
272 | |
273 for (octave_idx_type j = 0; j < iter; j++) | |
274 { | |
275 octave_idx_type offset = j; | |
276 octave_idx_type offset2 = 0; | |
277 while (offset >= stride) | |
278 { | |
279 offset -= stride; | |
280 offset2++; | |
281 } | |
282 offset += offset2 * stride * ns; | |
283 | |
284 // Flip the data in the vector so that int compares on | |
285 // IEEE754 give the correct ordering. | |
286 | |
287 for (octave_idx_type i = 0; i < ns; i++) | |
288 { | |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
289 vix[i] = FloatFlip (op[i*stride + offset]); |
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
290 vi[i] = vix + i; |
7433 | 291 } |
292 | |
293 indexed_sort.sort (vi, ns); | |
294 | |
295 // Flip the data out of the vector so that int compares on | |
296 // IEEE754 give the correct ordering | |
297 | |
298 for (octave_idx_type i = 0; i < ns; i++) | |
299 { | |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
300 p[i*stride + offset] = IFloatFlip (*vi[i]); |
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
301 sidx(i*stride + offset) = vi[i] - vix; |
7433 | 302 } |
303 | |
304 // There are two representations of NaN. One will be sorted | |
305 // to the beginning of the vector and the other to the end. | |
306 // If it will be sorted to the beginning, fix things up. | |
307 | |
308 if (lo_ieee_signbit (octave_NaN)) | |
309 { | |
7463
2467639bd8c0
eliminate UNDEFINED sort mode
John W. Eaton <jwe@octave.org>
parents:
7443
diff
changeset
|
310 if (mode == ASCENDING) |
7433 | 311 { |
312 octave_idx_type i = 0; | |
7924 | 313 while (xisnan (v[i++*stride+offset]) && i < ns) |
314 /* do nothing */; | |
7433 | 315 OCTAVE_LOCAL_BUFFER (double, itmp, i - 1); |
316 for (octave_idx_type l = 0; l < i -1; l++) | |
317 itmp[l] = sidx(l*stride + offset); | |
318 for (octave_idx_type l = 0; l < ns - i + 1; l++) | |
319 { | |
320 v[l*stride + offset] = v[(l+i-1)*stride + offset]; | |
321 sidx(l*stride + offset) = sidx((l+i-1)*stride + offset); | |
322 } | |
323 for (octave_idx_type k = 0, l = ns - i + 1; l < ns; l++, k++) | |
324 { | |
325 v[l*stride + offset] = octave_NaN; | |
326 sidx(l*stride + offset) = | |
327 static_cast<octave_idx_type>(itmp[k]); | |
328 } | |
329 } | |
330 else | |
331 { | |
332 octave_idx_type i = ns; | |
7924 | 333 while (xisnan (v[--i*stride+offset]) && i > 0) |
334 /* do nothing */; | |
7433 | 335 OCTAVE_LOCAL_BUFFER (double, itmp, ns - i - 1); |
336 for (octave_idx_type l = 0; l < ns - i -1; l++) | |
337 itmp[l] = sidx((l+i+1)*stride + offset); | |
338 for (octave_idx_type l = i; l >= 0; l--) | |
339 { | |
340 v[(l-i+ns-1)*stride + offset] = v[l*stride + offset]; | |
341 sidx((l-i+ns-1)*stride + offset) = sidx(l*stride + offset); | |
342 } | |
343 for (octave_idx_type k = 0, l = 0; l < ns - i - 1; l++, k++) | |
344 { | |
345 v[l*stride + offset] = octave_NaN; | |
346 sidx(l*stride + offset) = | |
347 static_cast<octave_idx_type>(itmp[k]); | |
348 } | |
349 } | |
350 } | |
351 } | |
352 | |
353 return m; | |
354 } | |
355 | |
356 #else | |
357 | |
358 template <> | |
359 bool | |
7443 | 360 ascending_compare (double a, double b) |
7433 | 361 { |
362 return (xisnan (b) || (a < b)); | |
363 } | |
364 | |
365 template <> | |
366 bool | |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
367 ascending_compare (const double *a, |
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
368 const double *b) |
7433 | 369 { |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
370 return (xisnan (*b) || (*a < *b)); |
7433 | 371 } |
372 | |
373 template <> | |
374 bool | |
7443 | 375 descending_compare (double a, double b) |
7433 | 376 { |
377 return (xisnan (a) || (a > b)); | |
378 } | |
379 | |
380 template <> | |
381 bool | |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
382 descending_compare (const double *a, |
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
383 const double *b) |
7433 | 384 { |
8651
ea8e65ca234f
reduce memory usage in sorting
Jaroslav Hajek <highegg@gmail.com>
parents:
8377
diff
changeset
|
385 return (xisnan (*b) || (*a > *b)); |
7433 | 386 } |
387 | |
388 INSTANTIATE_ARRAY_SORT (double); | |
389 | |
390 #endif | |
757 | 391 |
6708 | 392 INSTANTIATE_ARRAY_AND_ASSIGN (double, OCTAVE_API); |
757 | 393 |
8290
7cbe01c21986
improve dense array indexing
Jaroslav Hajek <highegg@gmail.com>
parents:
7924
diff
changeset
|
394 INSTANTIATE_ARRAY_ASSIGN (double, int, OCTAVE_API) |
7cbe01c21986
improve dense array indexing
Jaroslav Hajek <highegg@gmail.com>
parents:
7924
diff
changeset
|
395 INSTANTIATE_ARRAY_ASSIGN (double, short, OCTAVE_API) |
7cbe01c21986
improve dense array indexing
Jaroslav Hajek <highegg@gmail.com>
parents:
7924
diff
changeset
|
396 INSTANTIATE_ARRAY_ASSIGN (double, char, OCTAVE_API) |
3836 | 397 |
1989 | 398 #include "Array2.h" |
399 | |
6153 | 400 template class OCTAVE_API Array2<double>; |
1989 | 401 |
3665 | 402 #include "ArrayN.h" |
403 #include "ArrayN.cc" | |
404 | |
6153 | 405 template class OCTAVE_API ArrayN<double>; |
4505 | 406 |
6153 | 407 template OCTAVE_API std::ostream& operator << (std::ostream&, const ArrayN<double>&); |
3665 | 408 |
1989 | 409 #include "DiagArray2.h" |
410 #include "DiagArray2.cc" | |
411 | |
6153 | 412 template class OCTAVE_API DiagArray2<double>; |
1989 | 413 |
757 | 414 /* |
415 ;;; Local Variables: *** | |
416 ;;; mode: C++ *** | |
417 ;;; End: *** | |
418 */ |