*** ./liboctave/dMatrix.cc.orig 2004-01-28 15:17:03.000000000 +0100 --- ./liboctave/dMatrix.cc 2004-01-28 14:41:24.000000000 +0100 *************** *** 789,798 **** nsamples = nc; } ! ComplexMatrix tmp (*this); ! Complex *in (tmp.fortran_vec ()); Complex *out (retval.fortran_vec ()); for (size_t i = 0; i < nsamples; i++) { OCTAVE_QUIT; --- 789,800 ---- nsamples = nc; } ! const double *in (fortran_vec ()); Complex *out (retval.fortran_vec ()); + // XXX FIXME XXX + // octave_fftw::fft (in, out, npts, nsamples); + // should be faster but isn't !!! for (size_t i = 0; i < nsamples; i++) { OCTAVE_QUIT; *************** *** 828,833 **** --- 830,838 ---- Complex *in (tmp.fortran_vec ()); Complex *out (retval.fortran_vec ()); + // XXX FIXME XXX + // octave_fftw::ifft (in, out, npts, nsamples); + // should be faster but isn't !!! for (size_t i = 0; i < nsamples; i++) { OCTAVE_QUIT; *************** *** 844,853 **** int nr = rows (); int nc = cols (); ! ComplexMatrix retval (*this); // Note the order of passing the rows and columns to account for // column-major storage. ! octave_fftw::fft2d (retval.fortran_vec (), nc, nr); return retval; } --- 849,859 ---- int nr = rows (); int nc = cols (); ! const double *in = fortran_vec (); ! ComplexMatrix retval (rows (), cols ()); // Note the order of passing the rows and columns to account for // column-major storage. ! octave_fftw::fft2d (in, retval.fortran_vec (), nc, nr); return retval; } *** ./liboctave/CMatrix.cc.orig 2004-01-28 15:17:09.000000000 +0100 --- ./liboctave/CMatrix.cc 2004-01-28 10:40:00.000000000 +0100 *************** *** 1128,1133 **** --- 1128,1136 ---- const Complex *in (data ()); Complex *out (retval.fortran_vec ()); + // XXX FIXME XXX + // octave_fftw::fft (in, out, npts, nsamples); + // should be faster but isn't !!! for (size_t i = 0; i < nsamples; i++) { OCTAVE_QUIT; *************** *** 1162,1167 **** --- 1165,1173 ---- const Complex *in (data ()); Complex *out (retval.fortran_vec ()); + // XXX FIXME XXX + // octave_fftw::ifft (in, out, npts, nsamples); + // should be faster but isn't !!! for (size_t i = 0; i < nsamples; i++) { OCTAVE_QUIT; *** ./liboctave/oct-fftw.h.orig 2004-01-28 15:17:17.000000000 +0100 --- ./liboctave/oct-fftw.h 2004-01-28 09:31:53.000000000 +0100 *************** *** 25,32 **** --- 25,34 ---- #if defined (HAVE_DFFTW_H) #include + #include #else #include + #include #endif #include "oct-cmplx.h" *************** *** 35,42 **** octave_fftw { public: ! static int fft (const Complex*, Complex *, size_t); ! static int ifft (const Complex*, Complex *, size_t); static int fft2d (Complex*, size_t, size_t); static int ifft2d (Complex*, size_t, size_t); --- 37,50 ---- octave_fftw { public: ! static int fft (const double *in, Complex *out, size_t npts, ! size_t nsamples = 1); ! static int fft (const Complex *in, Complex *out, size_t npts, ! size_t nsamples = 1); ! static int ifft (const Complex *in, Complex *out, size_t npts, ! size_t nsamples = 1); ! ! static int fft2d (const double*, Complex*, size_t, size_t); static int fft2d (Complex*, size_t, size_t); static int ifft2d (Complex*, size_t, size_t); *** ./liboctave/oct-fftw.cc.orig 2004-01-28 15:17:22.000000000 +0100 --- ./liboctave/oct-fftw.cc 2004-01-28 15:30:42.000000000 +0100 *************** *** 26,31 **** --- 26,32 ---- #include "oct-fftw.h" #include "lo-error.h" + #include // Helper class to create and cache fftw plans for both 1d and 2d. This *************** *** 44,49 **** --- 45,53 ---- fftw_plan create_plan (fftw_direction, size_t); fftwnd_plan create_plan2d (fftw_direction, size_t, size_t); + rfftw_plan create_rplan (fftw_direction, size_t); + rfftwnd_plan create_rplan2d (fftw_direction, size_t, size_t); + private: int plan_flags; *************** *** 52,57 **** --- 56,67 ---- size_t n[2]; size_t n2d[2][2]; + + rfftw_plan rplan[2]; + rfftwnd_plan rplan2d[2]; + + size_t rn[2]; + size_t rn2d[2][2]; }; octave_fftw_planner::octave_fftw_planner () *************** *** 63,68 **** --- 73,84 ---- n[0] = n[1] = 0; n2d[0][0] = n2d[0][1] = n2d[1][0] = n2d[1][1] = 0; + + rplan[0] = rplan[1] = 0; + rplan2d[0] = rplan2d[1] = 0; + + rn[0] = rn[1] = 0; + rn2d[0][0] = rn2d[0][1] = rn2d[1][0] = rn2d[1][1] = 0; } fftw_plan *************** *** 92,100 **** return *cur_plan_p; } fftwnd_plan octave_fftw_planner::create_plan2d (fftw_direction dir, ! size_t nrows, size_t ncols) { size_t which = (dir == FFTW_FORWARD) ? 0 : 1; fftwnd_plan *cur_plan_p = &plan2d[which]; --- 108,143 ---- return *cur_plan_p; } + rfftw_plan + octave_fftw_planner::create_rplan (fftw_direction dir, size_t npts) + { + size_t which = (dir == FFTW_FORWARD) ? 0 : 1; + rfftw_plan *cur_plan_p = &rplan[which]; + bool create_new_plan = false; + + if (rplan[which] == 0 || rn[which] != npts) + { + create_new_plan = true; + rn[which] = npts; + } + + if (create_new_plan) + { + if (*cur_plan_p) + rfftw_destroy_plan (*cur_plan_p); + + *cur_plan_p = rfftw_create_plan (npts, dir, plan_flags); + + if (*cur_plan_p == 0) + (*current_liboctave_error_handler) ("Error creating fftw plan"); + } + + return *cur_plan_p; + } + fftwnd_plan octave_fftw_planner::create_plan2d (fftw_direction dir, ! size_t nrows, size_t ncols) { size_t which = (dir == FFTW_FORWARD) ? 0 : 1; fftwnd_plan *cur_plan_p = &plan2d[which]; *************** *** 112,120 **** { if (*cur_plan_p) fftwnd_destroy_plan (*cur_plan_p); ! *cur_plan_p = fftw2d_create_plan (nrows, ncols, dir, plan_flags | FFTW_IN_PLACE); if (*cur_plan_p == 0) (*current_liboctave_error_handler) ("Error creating 2d fftw plan"); --- 155,194 ---- { if (*cur_plan_p) fftwnd_destroy_plan (*cur_plan_p); ! *cur_plan_p = fftw2d_create_plan (nrows, ncols, dir, plan_flags | FFTW_IN_PLACE); + if (*cur_plan_p == 0) + (*current_liboctave_error_handler) ("Error creating 2d fftw plan"); + } + + return *cur_plan_p; + } + + rfftwnd_plan + octave_fftw_planner::create_rplan2d (fftw_direction dir, + size_t nrows, size_t ncols) + { + size_t which = (dir == FFTW_FORWARD) ? 0 : 1; + fftwnd_plan *cur_plan_p = &rplan2d[which]; + bool create_new_plan = false; + + if (rplan2d[which] == 0 || rn2d[which][0] != nrows || + rn2d[which][1] != ncols) + { + create_new_plan = true; + + rn2d[which][0] = nrows; + rn2d[which][1] = ncols; + } + + if (create_new_plan) + { + if (*cur_plan_p) + rfftwnd_destroy_plan (*cur_plan_p); + + *cur_plan_p = rfftw2d_create_plan (nrows, ncols, dir, + plan_flags); if (*cur_plan_p == 0) (*current_liboctave_error_handler) ("Error creating 2d fftw plan"); *************** *** 125,154 **** static octave_fftw_planner fftw_planner; int ! octave_fftw::fft (const Complex *in, Complex *out, size_t npts) { ! fftw_one (fftw_planner.create_plan (FFTW_FORWARD, npts), ! reinterpret_cast (const_cast (in)), ! reinterpret_cast (out)); return 0; } int ! octave_fftw::ifft (const Complex *in, Complex *out, size_t npts) { ! fftw_one (fftw_planner.create_plan (FFTW_BACKWARD, npts), ! reinterpret_cast (const_cast (in)), ! reinterpret_cast (out)); const Complex scale = npts; ! for (size_t i = 0; i < npts; i++) out[i] /= scale; return 0; } int octave_fftw::fft2d (Complex *inout, size_t nr, size_t nc) { --- 199,321 ---- static octave_fftw_planner fftw_planner; + static inline void convert_halfcomplex (Complex *in, size_t npts, + size_t nsamples) + { + // This is likely to thrash about in the cache !!! + double *ptr1, *ptr2; + + for (size_t k = 0; k < nsamples; k++) + { + // First move the imaginary parts + ptr1 = (double *)in + 3; + ptr2 = (double *)in + 2 * (npts-1); + for (size_t i = 0; i < (npts-1)/2; i++) + { + *ptr1 = *ptr2; + *(ptr2+1) = - *ptr2; + ptr1 += 2; + ptr2 -= 2; + } + + // Now move the real parts + ptr1 = (double *)in + 2; + ptr2 = (double *)in + 2 * (npts-1); + for (size_t i = 0; i < npts/2; i++) + { + *ptr2 = *ptr1; + ptr1 += 2; + ptr2 -= 2; + } + + // Fix-up complex parts at zero and n/2+1 if n is even + ptr1 = (double *)in; + *(ptr1 + 1) = 0.; + if (!(npts & 1)) + *(ptr1 + npts + 1) = 0.; + + // Advance to the next data + in += npts; + } + } + + int + octave_fftw::fft (const double *in, Complex *out, size_t npts, size_t nsamples) + { + // Fool with the stride to make the conversion to complex easier + rfftw (fftw_planner.create_rplan (FFTW_FORWARD, npts), nsamples, + reinterpret_cast (const_cast (in)), 1, npts, + reinterpret_cast (out), 2, 2*npts); + + // Need to create other half of the transform + convert_halfcomplex (out, npts, nsamples); + + return 0; + } + int ! octave_fftw::fft (const Complex *in, Complex *out, size_t npts, ! size_t nsamples) { ! fftw (fftw_planner.create_plan (FFTW_FORWARD, npts), nsamples, ! reinterpret_cast (const_cast (in)), ! 1, npts, reinterpret_cast (out), 1, npts); return 0; } int ! octave_fftw::ifft (const Complex *in, Complex *out, size_t npts, ! size_t nsamples) { ! fftw (fftw_planner.create_plan (FFTW_BACKWARD, npts), nsamples, ! reinterpret_cast (const_cast (in)), ! 1, npts, reinterpret_cast (out), 1, npts); const Complex scale = npts; ! for (size_t i = 0; i < npts*nsamples; i++) out[i] /= scale; return 0; } + static inline void convert_packcomplex (Complex *out, size_t nr, size_t nc) + { + Complex *ptr1, *ptr2; + + // Create space for the missing elements + for (size_t i = 0; i < nr; i++) + { + ptr1 = out + i * (nc/2 + 1) + nr*((nc-1)/2); + ptr2 = out + i * nc; + for (size_t j = 0; j < nc/2+1; j++) + *ptr2++ = *ptr1++; + } + + // Fill in the missing data + for (size_t i = 1; i < nr; i++) + for (size_t j = nc/2+1; j < nc; j++) + out[j + i*nc] = conj(out[nc - j + (nr-i)*nc]); + for (size_t j = nc/2+1; j < nc; j++) + out[j] = conj(out[nc - j]); + } + + int + octave_fftw::fft2d (const double *in, Complex *out, size_t nr, size_t nc) + { + // Fool with the position of the start of the matrix, so that creating + // other half of the matrix won't cause cache problems + rfftwnd_one_real_to_complex (fftw_planner.create_rplan2d (FFTW_FORWARD, + nr, nc), + reinterpret_cast (const_cast(in)), + reinterpret_cast (out + nr*((nc-1)/2))); + + // Need to create other half of the transform + convert_packcomplex (out, nr, nc); + + return 0; + } + int octave_fftw::fft2d (Complex *inout, size_t nr, size_t nc) { *** ./configure.in.orig 2004-01-28 15:16:49.000000000 +0100 --- ./configure.in 2004-01-28 11:18:57.000000000 +0100 *************** *** 408,417 **** if test "$with_fftw" = "yes"; then have_fftw_header=no AC_CHECK_HEADERS(dfftw.h fftw.h, [have_fftw_header=yes; break]) ! if test "$have_fftw_header" = yes; then AC_CHECK_LIB(dfftw, fftw_create_plan, FFTW_LIBS="-ldfftw", [AC_CHECK_LIB(fftw, fftw_create_plan, FFTW_LIBS="-lfftw", with_fftw=no)]) else with_fftw=no fi --- 408,421 ---- if test "$with_fftw" = "yes"; then have_fftw_header=no + have_rfftw_header=no AC_CHECK_HEADERS(dfftw.h fftw.h, [have_fftw_header=yes; break]) ! AC_CHECK_HEADERS(drfftw.h rfftw.h, [have_rfftw_header=yes; break]) ! if test "$have_fftw_header" = yes && test "$have_rfftw_header" = yes; then AC_CHECK_LIB(dfftw, fftw_create_plan, FFTW_LIBS="-ldfftw", [AC_CHECK_LIB(fftw, fftw_create_plan, FFTW_LIBS="-lfftw", with_fftw=no)]) + AC_CHECK_LIB(drfftw, rfftw_create_plan, FFTW_LIBS="$FFTW_LIBS -ldrfftw", + [AC_CHECK_LIB(rfftw, rfftw_create_plan, FFTW_LIBS="$FFTW_LIBS -lrfftw", with_fftw=no, $FFTW_LIBS)], $FFTW_LIBS) else with_fftw=no fi