Mercurial > hg > octave-lyh
comparison liboctave/dNDArray.cc @ 7789:82be108cc558
First attempt at single precision tyeps
* * *
corrections to qrupdate single precision routines
* * *
prefer demotion to single over promotion to double
* * *
Add single precision support to log2 function
* * *
Trivial PROJECT file update
* * *
Cache optimized hermitian/transpose methods
* * *
Add tests for tranpose/hermitian and ChangeLog entry for new transpose code
author | David Bateman <dbateman@free.fr> |
---|---|
date | Sun, 27 Apr 2008 22:34:17 +0200 |
parents | 36594d5bbe13 |
children | 9d080df0c843 |
comparison
equal
deleted
inserted
replaced
7788:45f5faba05a2 | 7789:82be108cc558 |
---|---|
180 // double complex arguments. They have been modified by adding an | 180 // double complex arguments. They have been modified by adding an |
181 // implicit double precision (a-h,o-z) statement at the beginning of | 181 // implicit double precision (a-h,o-z) statement at the beginning of |
182 // each subroutine. | 182 // each subroutine. |
183 | 183 |
184 F77_RET_T | 184 F77_RET_T |
185 F77_FUNC (cffti, CFFTI) (const octave_idx_type&, Complex*); | 185 F77_FUNC (zffti, ZFFTI) (const octave_idx_type&, Complex*); |
186 | 186 |
187 F77_RET_T | 187 F77_RET_T |
188 F77_FUNC (cfftf, CFFTF) (const octave_idx_type&, Complex*, Complex*); | 188 F77_FUNC (zfftf, ZFFTF) (const octave_idx_type&, Complex*, Complex*); |
189 | 189 |
190 F77_RET_T | 190 F77_RET_T |
191 F77_FUNC (cfftb, CFFTB) (const octave_idx_type&, Complex*, Complex*); | 191 F77_FUNC (zfftb, ZFFTB) (const octave_idx_type&, Complex*, Complex*); |
192 } | 192 } |
193 | 193 |
194 ComplexNDArray | 194 ComplexNDArray |
195 NDArray::fourier (int dim) const | 195 NDArray::fourier (int dim) const |
196 { | 196 { |
215 octave_idx_type howmany = numel () / npts; | 215 octave_idx_type howmany = numel () / npts; |
216 howmany = (stride == 1 ? howmany : (howmany > stride ? stride : howmany)); | 216 howmany = (stride == 1 ? howmany : (howmany > stride ? stride : howmany)); |
217 octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); | 217 octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); |
218 octave_idx_type dist = (stride == 1 ? npts : 1); | 218 octave_idx_type dist = (stride == 1 ? npts : 1); |
219 | 219 |
220 F77_FUNC (cffti, CFFTI) (npts, pwsave); | 220 F77_FUNC (zffti, ZFFTI) (npts, pwsave); |
221 | 221 |
222 for (octave_idx_type k = 0; k < nloop; k++) | 222 for (octave_idx_type k = 0; k < nloop; k++) |
223 { | 223 { |
224 for (octave_idx_type j = 0; j < howmany; j++) | 224 for (octave_idx_type j = 0; j < howmany; j++) |
225 { | 225 { |
226 OCTAVE_QUIT; | 226 OCTAVE_QUIT; |
227 | 227 |
228 for (octave_idx_type i = 0; i < npts; i++) | 228 for (octave_idx_type i = 0; i < npts; i++) |
229 tmp[i] = elem((i + k*npts)*stride + j*dist); | 229 tmp[i] = elem((i + k*npts)*stride + j*dist); |
230 | 230 |
231 F77_FUNC (cfftf, CFFTF) (npts, tmp, pwsave); | 231 F77_FUNC (zfftf, ZFFTF) (npts, tmp, pwsave); |
232 | 232 |
233 for (octave_idx_type i = 0; i < npts; i++) | 233 for (octave_idx_type i = 0; i < npts; i++) |
234 retval ((i + k*npts)*stride + j*dist) = tmp[i]; | 234 retval ((i + k*npts)*stride + j*dist) = tmp[i]; |
235 } | 235 } |
236 } | 236 } |
262 octave_idx_type howmany = numel () / npts; | 262 octave_idx_type howmany = numel () / npts; |
263 howmany = (stride == 1 ? howmany : (howmany > stride ? stride : howmany)); | 263 howmany = (stride == 1 ? howmany : (howmany > stride ? stride : howmany)); |
264 octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); | 264 octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); |
265 octave_idx_type dist = (stride == 1 ? npts : 1); | 265 octave_idx_type dist = (stride == 1 ? npts : 1); |
266 | 266 |
267 F77_FUNC (cffti, CFFTI) (npts, pwsave); | 267 F77_FUNC (zffti, ZFFTI) (npts, pwsave); |
268 | 268 |
269 for (octave_idx_type k = 0; k < nloop; k++) | 269 for (octave_idx_type k = 0; k < nloop; k++) |
270 { | 270 { |
271 for (octave_idx_type j = 0; j < howmany; j++) | 271 for (octave_idx_type j = 0; j < howmany; j++) |
272 { | 272 { |
273 OCTAVE_QUIT; | 273 OCTAVE_QUIT; |
274 | 274 |
275 for (octave_idx_type i = 0; i < npts; i++) | 275 for (octave_idx_type i = 0; i < npts; i++) |
276 tmp[i] = elem((i + k*npts)*stride + j*dist); | 276 tmp[i] = elem((i + k*npts)*stride + j*dist); |
277 | 277 |
278 F77_FUNC (cfftb, CFFTB) (npts, tmp, pwsave); | 278 F77_FUNC (zfftb, ZFFTB) (npts, tmp, pwsave); |
279 | 279 |
280 for (octave_idx_type i = 0; i < npts; i++) | 280 for (octave_idx_type i = 0; i < npts; i++) |
281 retval ((i + k*npts)*stride + j*dist) = tmp[i] / | 281 retval ((i + k*npts)*stride + j*dist) = tmp[i] / |
282 static_cast<double> (npts); | 282 static_cast<double> (npts); |
283 } | 283 } |
308 howmany = (stride == 1 ? howmany : | 308 howmany = (stride == 1 ? howmany : |
309 (howmany > stride ? stride : howmany)); | 309 (howmany > stride ? stride : howmany)); |
310 octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); | 310 octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); |
311 octave_idx_type dist = (stride == 1 ? npts : 1); | 311 octave_idx_type dist = (stride == 1 ? npts : 1); |
312 | 312 |
313 F77_FUNC (cffti, CFFTI) (npts, pwsave); | 313 F77_FUNC (zffti, ZFFTI) (npts, pwsave); |
314 | 314 |
315 for (octave_idx_type k = 0; k < nloop; k++) | 315 for (octave_idx_type k = 0; k < nloop; k++) |
316 { | 316 { |
317 for (octave_idx_type j = 0; j < howmany; j++) | 317 for (octave_idx_type j = 0; j < howmany; j++) |
318 { | 318 { |
319 OCTAVE_QUIT; | 319 OCTAVE_QUIT; |
320 | 320 |
321 for (octave_idx_type l = 0; l < npts; l++) | 321 for (octave_idx_type l = 0; l < npts; l++) |
322 prow[l] = retval ((l + k*npts)*stride + j*dist); | 322 prow[l] = retval ((l + k*npts)*stride + j*dist); |
323 | 323 |
324 F77_FUNC (cfftf, CFFTF) (npts, prow, pwsave); | 324 F77_FUNC (zfftf, ZFFTF) (npts, prow, pwsave); |
325 | 325 |
326 for (octave_idx_type l = 0; l < npts; l++) | 326 for (octave_idx_type l = 0; l < npts; l++) |
327 retval ((l + k*npts)*stride + j*dist) = prow[l]; | 327 retval ((l + k*npts)*stride + j*dist) = prow[l]; |
328 } | 328 } |
329 } | 329 } |
356 howmany = (stride == 1 ? howmany : | 356 howmany = (stride == 1 ? howmany : |
357 (howmany > stride ? stride : howmany)); | 357 (howmany > stride ? stride : howmany)); |
358 octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); | 358 octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); |
359 octave_idx_type dist = (stride == 1 ? npts : 1); | 359 octave_idx_type dist = (stride == 1 ? npts : 1); |
360 | 360 |
361 F77_FUNC (cffti, CFFTI) (npts, pwsave); | 361 F77_FUNC (zffti, ZFFTI) (npts, pwsave); |
362 | 362 |
363 for (octave_idx_type k = 0; k < nloop; k++) | 363 for (octave_idx_type k = 0; k < nloop; k++) |
364 { | 364 { |
365 for (octave_idx_type j = 0; j < howmany; j++) | 365 for (octave_idx_type j = 0; j < howmany; j++) |
366 { | 366 { |
367 OCTAVE_QUIT; | 367 OCTAVE_QUIT; |
368 | 368 |
369 for (octave_idx_type l = 0; l < npts; l++) | 369 for (octave_idx_type l = 0; l < npts; l++) |
370 prow[l] = retval ((l + k*npts)*stride + j*dist); | 370 prow[l] = retval ((l + k*npts)*stride + j*dist); |
371 | 371 |
372 F77_FUNC (cfftb, CFFTB) (npts, prow, pwsave); | 372 F77_FUNC (zfftb, ZFFTB) (npts, prow, pwsave); |
373 | 373 |
374 for (octave_idx_type l = 0; l < npts; l++) | 374 for (octave_idx_type l = 0; l < npts; l++) |
375 retval ((l + k*npts)*stride + j*dist) = prow[l] / | 375 retval ((l + k*npts)*stride + j*dist) = prow[l] / |
376 static_cast<double> (npts); | 376 static_cast<double> (npts); |
377 } | 377 } |
404 howmany = (stride == 1 ? howmany : | 404 howmany = (stride == 1 ? howmany : |
405 (howmany > stride ? stride : howmany)); | 405 (howmany > stride ? stride : howmany)); |
406 octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); | 406 octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); |
407 octave_idx_type dist = (stride == 1 ? npts : 1); | 407 octave_idx_type dist = (stride == 1 ? npts : 1); |
408 | 408 |
409 F77_FUNC (cffti, CFFTI) (npts, pwsave); | 409 F77_FUNC (zffti, ZFFTI) (npts, pwsave); |
410 | 410 |
411 for (octave_idx_type k = 0; k < nloop; k++) | 411 for (octave_idx_type k = 0; k < nloop; k++) |
412 { | 412 { |
413 for (octave_idx_type j = 0; j < howmany; j++) | 413 for (octave_idx_type j = 0; j < howmany; j++) |
414 { | 414 { |
415 OCTAVE_QUIT; | 415 OCTAVE_QUIT; |
416 | 416 |
417 for (octave_idx_type l = 0; l < npts; l++) | 417 for (octave_idx_type l = 0; l < npts; l++) |
418 prow[l] = retval ((l + k*npts)*stride + j*dist); | 418 prow[l] = retval ((l + k*npts)*stride + j*dist); |
419 | 419 |
420 F77_FUNC (cfftf, CFFTF) (npts, prow, pwsave); | 420 F77_FUNC (zfftf, ZFFTF) (npts, prow, pwsave); |
421 | 421 |
422 for (octave_idx_type l = 0; l < npts; l++) | 422 for (octave_idx_type l = 0; l < npts; l++) |
423 retval ((l + k*npts)*stride + j*dist) = prow[l]; | 423 retval ((l + k*npts)*stride + j*dist) = prow[l]; |
424 } | 424 } |
425 } | 425 } |
451 howmany = (stride == 1 ? howmany : | 451 howmany = (stride == 1 ? howmany : |
452 (howmany > stride ? stride : howmany)); | 452 (howmany > stride ? stride : howmany)); |
453 octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); | 453 octave_idx_type nloop = (stride == 1 ? 1 : numel () / npts / stride); |
454 octave_idx_type dist = (stride == 1 ? npts : 1); | 454 octave_idx_type dist = (stride == 1 ? npts : 1); |
455 | 455 |
456 F77_FUNC (cffti, CFFTI) (npts, pwsave); | 456 F77_FUNC (zffti, ZFFTI) (npts, pwsave); |
457 | 457 |
458 for (octave_idx_type k = 0; k < nloop; k++) | 458 for (octave_idx_type k = 0; k < nloop; k++) |
459 { | 459 { |
460 for (octave_idx_type j = 0; j < howmany; j++) | 460 for (octave_idx_type j = 0; j < howmany; j++) |
461 { | 461 { |
462 OCTAVE_QUIT; | 462 OCTAVE_QUIT; |
463 | 463 |
464 for (octave_idx_type l = 0; l < npts; l++) | 464 for (octave_idx_type l = 0; l < npts; l++) |
465 prow[l] = retval ((l + k*npts)*stride + j*dist); | 465 prow[l] = retval ((l + k*npts)*stride + j*dist); |
466 | 466 |
467 F77_FUNC (cfftb, CFFTB) (npts, prow, pwsave); | 467 F77_FUNC (zfftb, ZFFTB) (npts, prow, pwsave); |
468 | 468 |
469 for (octave_idx_type l = 0; l < npts; l++) | 469 for (octave_idx_type l = 0; l < npts; l++) |
470 retval ((l + k*npts)*stride + j*dist) = prow[l] / | 470 retval ((l + k*npts)*stride + j*dist) = prow[l] / |
471 static_cast<double> (npts); | 471 static_cast<double> (npts); |
472 } | 472 } |