comparison liboctave/Array-f.cc @ 7789:82be108cc558

First attempt at single precision tyeps * * * corrections to qrupdate single precision routines * * * prefer demotion to single over promotion to double * * * Add single precision support to log2 function * * * Trivial PROJECT file update * * * Cache optimized hermitian/transpose methods * * * Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author David Bateman <dbateman@free.fr>
date Sun, 27 Apr 2008 22:34:17 +0200
parents
children 4976f66d469b
comparison
equal deleted inserted replaced
7788:45f5faba05a2 7789:82be108cc558
1 /*
2
3 Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2003, 2004,
4 2005, 2006, 2007 John W. Eaton
5
6 This file is part of Octave.
7
8 Octave is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3 of the License, or (at your
11 option) any later version.
12
13 Octave is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with Octave; see the file COPYING. If not, see
20 <http://www.gnu.org/licenses/>.
21
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27
28 // Instantiate Arrays of float values.
29
30 #include "Array.h"
31 #include "Array.cc"
32 #include "oct-sort.cc"
33
34 #if defined (HAVE_IEEE754_DATA_FORMAT)
35
36 static inline uint32_t
37 FloatFlip (uint32_t f)
38 {
39 uint32_t mask
40 = -static_cast<int32_t>(f >> 31) | 0x80000000UL;
41
42 return f ^ mask;
43 }
44
45 static inline uint32_t
46 IFloatFlip (uint32_t f)
47 {
48 uint32_t mask = ((f >> 31) - 1) | 0x80000000UL;
49
50 return f ^ mask;
51 }
52
53 template <>
54 bool
55 ascending_compare (float a, float b)
56 {
57 return (xisnan (b) || (a < b));
58 }
59
60 template <>
61 bool
62 ascending_compare (vec_index<float> *a, vec_index<float> *b)
63 {
64 return (xisnan (b->vec) || (a->vec < b->vec));
65 }
66
67 template <>
68 bool
69 descending_compare (float a, float b)
70 {
71 return (xisnan (a) || (a > b));
72 }
73
74 template <>
75 bool
76 descending_compare (vec_index<float> *a, vec_index<float> *b)
77 {
78 return (xisnan (b->vec) || (a->vec > b->vec));
79 }
80
81 INSTANTIATE_ARRAY_SORT (uint32_t);
82
83 template <>
84 Array<float>
85 Array<float>::sort (octave_idx_type dim, sortmode mode) const
86 {
87 Array<float> m = *this;
88
89 dim_vector dv = m.dims ();
90
91 if (m.length () < 1)
92 return m;
93
94 octave_idx_type ns = dv(dim);
95 octave_idx_type iter = dv.numel () / ns;
96 octave_idx_type stride = 1;
97 for (int i = 0; i < dim; i++)
98 stride *= dv(i);
99
100 float *v = m.fortran_vec ();
101
102 uint32_t *p = reinterpret_cast<uint32_t *> (v);
103
104 octave_sort<uint32_t> lsort;
105
106 if (mode == ASCENDING)
107 lsort.set_compare (ascending_compare);
108 else if (mode == DESCENDING)
109 lsort.set_compare (descending_compare);
110 else
111 abort ();
112
113 if (stride == 1)
114 {
115 for (octave_idx_type j = 0; j < iter; j++)
116 {
117 // Flip the data in the vector so that int compares on
118 // IEEE754 give the correct ordering.
119
120 for (octave_idx_type i = 0; i < ns; i++)
121 p[i] = FloatFlip (p[i]);
122
123 lsort.sort (p, ns);
124
125 // Flip the data out of the vector so that int compares
126 // on IEEE754 give the correct ordering.
127
128 for (octave_idx_type i = 0; i < ns; i++)
129 p[i] = IFloatFlip (p[i]);
130
131 // There are two representations of NaN. One will be
132 // sorted to the beginning of the vector and the other
133 // to the end. If it will be sorted incorrectly, fix
134 // things up.
135
136 if (lo_ieee_signbit (octave_Float_NaN))
137 {
138 if (mode == ASCENDING)
139 {
140 octave_idx_type i = 0;
141 float *vtmp = reinterpret_cast<float *> (p);
142 while (xisnan (vtmp[i++]) && i < ns);
143 for (octave_idx_type l = 0; l < ns - i + 1; l++)
144 vtmp[l] = vtmp[l+i-1];
145 for (octave_idx_type l = ns - i + 1; l < ns; l++)
146 vtmp[l] = octave_Float_NaN;
147 }
148 else
149 {
150 octave_idx_type i = ns;
151 float *vtmp = reinterpret_cast<float *> (p);
152 while (xisnan (vtmp[--i]) && i > 0);
153 for (octave_idx_type l = i; l >= 0; l--)
154 vtmp[l-i+ns-1] = vtmp[l];
155 for (octave_idx_type l = 0; l < ns - i - 1; l++)
156 vtmp[l] = octave_Float_NaN;
157 }
158 }
159
160 p += ns;
161 }
162 }
163 else
164 {
165 OCTAVE_LOCAL_BUFFER (uint32_t, vi, ns);
166
167 for (octave_idx_type j = 0; j < iter; j++)
168 {
169 octave_idx_type offset = j;
170 octave_idx_type offset2 = 0;
171 while (offset >= stride)
172 {
173 offset -= stride;
174 offset2++;
175 }
176 offset += offset2 * stride * ns;
177
178 // Flip the data in the vector so that int compares on
179 // IEEE754 give the correct ordering.
180
181 for (octave_idx_type i = 0; i < ns; i++)
182 vi[i] = FloatFlip (p[i*stride + offset]);
183
184 lsort.sort (vi, ns);
185
186 // Flip the data out of the vector so that int compares
187 // on IEEE754 give the correct ordering.
188
189 for (octave_idx_type i = 0; i < ns; i++)
190 p[i*stride + offset] = IFloatFlip (vi[i]);
191
192 // There are two representations of NaN. One will be
193 // sorted to the beginning of the vector and the other
194 // to the end. If it will be sorted to the beginning,
195 // fix things up.
196
197 if (lo_ieee_signbit (octave_Float_NaN))
198 {
199 if (mode == ASCENDING)
200 {
201 octave_idx_type i = 0;
202 while (xisnan (v[i++*stride + offset]) && i < ns);
203 for (octave_idx_type l = 0; l < ns - i + 1; l++)
204 v[l*stride + offset] = v[(l+i-1)*stride + offset];
205 for (octave_idx_type l = ns - i + 1; l < ns; l++)
206 v[l*stride + offset] = octave_Float_NaN;
207 }
208 else
209 {
210 octave_idx_type i = ns;
211 while (xisnan (v[--i*stride + offset]) && i > 0);
212 for (octave_idx_type l = i; l >= 0; l--)
213 v[(l-i+ns-1)*stride + offset] = v[l*stride + offset];
214 for (octave_idx_type l = 0; l < ns - i - 1; l++)
215 v[l*stride + offset] = octave_Float_NaN;
216 }
217 }
218 }
219 }
220
221 return m;
222 }
223
224 template <>
225 Array<float>
226 Array<float>::sort (Array<octave_idx_type> &sidx, octave_idx_type dim,
227 sortmode mode) const
228 {
229 Array<float> m = *this;
230
231 dim_vector dv = m.dims ();
232
233 if (m.length () < 1)
234 {
235 sidx = Array<octave_idx_type> (dv);
236 return m;
237 }
238
239 octave_idx_type ns = dv(dim);
240 octave_idx_type iter = dv.numel () / ns;
241 octave_idx_type stride = 1;
242 for (int i = 0; i < dim; i++)
243 stride *= dv(i);
244
245 float *v = m.fortran_vec ();
246
247 uint32_t *p = reinterpret_cast<uint32_t *> (v);
248
249 octave_sort<vec_index<uint32_t> *> indexed_sort;
250
251 if (mode == ASCENDING)
252 indexed_sort.set_compare (ascending_compare);
253 else if (mode == DESCENDING)
254 indexed_sort.set_compare (descending_compare);
255 else
256 abort ();
257
258 OCTAVE_LOCAL_BUFFER (vec_index<uint32_t> *, vi, ns);
259 OCTAVE_LOCAL_BUFFER (vec_index<uint32_t>, vix, ns);
260
261 for (octave_idx_type i = 0; i < ns; i++)
262 vi[i] = &vix[i];
263
264 sidx = Array<octave_idx_type> (dv);
265
266 for (octave_idx_type j = 0; j < iter; j++)
267 {
268 octave_idx_type offset = j;
269 octave_idx_type offset2 = 0;
270 while (offset >= stride)
271 {
272 offset -= stride;
273 offset2++;
274 }
275 offset += offset2 * stride * ns;
276
277 // Flip the data in the vector so that int compares on
278 // IEEE754 give the correct ordering.
279
280 for (octave_idx_type i = 0; i < ns; i++)
281 {
282 vi[i]->vec = FloatFlip (p[i*stride + offset]);
283 vi[i]->indx = i;
284 }
285
286 indexed_sort.sort (vi, ns);
287
288 // Flip the data out of the vector so that int compares on
289 // IEEE754 give the correct ordering
290
291 for (octave_idx_type i = 0; i < ns; i++)
292 {
293 p[i*stride + offset] = IFloatFlip (vi[i]->vec);
294 sidx(i*stride + offset) = vi[i]->indx;
295 }
296
297 // There are two representations of NaN. One will be sorted
298 // to the beginning of the vector and the other to the end.
299 // If it will be sorted to the beginning, fix things up.
300
301 if (lo_ieee_signbit (octave_Float_NaN))
302 {
303 if (mode == ASCENDING)
304 {
305 octave_idx_type i = 0;
306 while (xisnan (v[i++*stride+offset]) && i < ns);
307 OCTAVE_LOCAL_BUFFER (float, itmp, i - 1);
308 for (octave_idx_type l = 0; l < i -1; l++)
309 itmp[l] = sidx(l*stride + offset);
310 for (octave_idx_type l = 0; l < ns - i + 1; l++)
311 {
312 v[l*stride + offset] = v[(l+i-1)*stride + offset];
313 sidx(l*stride + offset) = sidx((l+i-1)*stride + offset);
314 }
315 for (octave_idx_type k = 0, l = ns - i + 1; l < ns; l++, k++)
316 {
317 v[l*stride + offset] = octave_Float_NaN;
318 sidx(l*stride + offset) =
319 static_cast<octave_idx_type>(itmp[k]);
320 }
321 }
322 else
323 {
324 octave_idx_type i = ns;
325 while (xisnan (v[--i*stride+offset]) && i > 0);
326 OCTAVE_LOCAL_BUFFER (float, itmp, ns - i - 1);
327 for (octave_idx_type l = 0; l < ns - i -1; l++)
328 itmp[l] = sidx((l+i+1)*stride + offset);
329 for (octave_idx_type l = i; l >= 0; l--)
330 {
331 v[(l-i+ns-1)*stride + offset] = v[l*stride + offset];
332 sidx((l-i+ns-1)*stride + offset) = sidx(l*stride + offset);
333 }
334 for (octave_idx_type k = 0, l = 0; l < ns - i - 1; l++, k++)
335 {
336 v[l*stride + offset] = octave_Float_NaN;
337 sidx(l*stride + offset) =
338 static_cast<octave_idx_type>(itmp[k]);
339 }
340 }
341 }
342 }
343
344 return m;
345 }
346
347 #else
348
349 template <>
350 bool
351 ascending_compare (float a, float b)
352 {
353 return (xisnan (b) || (a < b));
354 }
355
356 template <>
357 bool
358 ascending_compare (vec_index<float> *a,
359 vec_index<float> *b)
360 {
361 return (xisnan (b->vec) || (a->vec < b->vec));
362 }
363
364 template <>
365 bool
366 descending_compare (float a, float b)
367 {
368 return (xisnan (a) || (a > b));
369 }
370
371 template <>
372 bool
373 descending_compare (vec_index<float> *a,
374 vec_index<float> *b)
375 {
376 return (xisnan (b->vec) || (a->vec > b->vec));
377 }
378
379 INSTANTIATE_ARRAY_SORT (float);
380
381 #endif
382
383 INSTANTIATE_ARRAY_AND_ASSIGN (float, OCTAVE_API);
384
385 INSTANTIATE_ARRAY_ASSIGN (float, int, OCTAVE_API);
386 INSTANTIATE_ARRAY_ASSIGN (float, short, OCTAVE_API);
387 INSTANTIATE_ARRAY_ASSIGN (float, char, OCTAVE_API);
388
389 #include "Array2.h"
390
391 template class OCTAVE_API Array2<float>;
392
393 #include "ArrayN.h"
394 #include "ArrayN.cc"
395
396 template class OCTAVE_API ArrayN<float>;
397
398 template OCTAVE_API std::ostream& operator << (std::ostream&, const ArrayN<float>&);
399
400 #include "DiagArray2.h"
401 #include "DiagArray2.cc"
402
403 template class OCTAVE_API DiagArray2<float>;
404
405 /*
406 ;;; Local Variables: ***
407 ;;; mode: C++ ***
408 ;;; End: ***
409 */