Logo Search packages:      
Sourcecode: p4fftwgel version File versions  Download package

executor.c

/*
 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
 * Copyright (c) 2000-2001 Stefan Kral
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

/*
 * executor.c -- execute the fft
 */

#include <fftw-int.h>
#include <stdio.h>
#include <stdlib.h>

const char *fftw_version = "P4/FFTW-GEL V1.2 (2001/12/17)";

/*
 * The function fftw_strided_copy is called in other files, 
 * so we cannot declare it static. 
 */

#ifdef FFTW_ENABLE_FLOAT

void fftw_strided_copy(int n, 
                   fftw_complex *in_c, 
                   int ostride,
                   fftw_complex *out_c)
{
  double *in = (double *) in_c, *out = (double *) out_c;

  while (n&3) {
    *out = *in;
    in++;
    out += ostride;
    n--;
  }

  while (n != 0) {
    out[0*ostride] = in[0];
    out[1*ostride] = in[1];
    out[2*ostride] = in[2];
    out[3*ostride] = in[3];
    in += 4;
    out += 4*ostride;
    n -= 4;
  }
}

#else

void fftw_strided_copy(int n, fftw_complex *in, int ostride,
                   fftw_complex *out)
{
     int i;
     fftw_real r0, r1, i0, i1;
     fftw_real r2, r3, i2, i3;

     i = 0;

     for (; i < (n & 3); ++i) {
        out[i * ostride] = in[i];
     }

     for (; i < n; i += 4) {
        r0 = c_re(in[i]);
        i0 = c_im(in[i]);
        r1 = c_re(in[i + 1]);
        i1 = c_im(in[i + 1]);
        r2 = c_re(in[i + 2]);
        i2 = c_im(in[i + 2]);
        r3 = c_re(in[i + 3]);
        i3 = c_im(in[i + 3]);
        c_re(out[i * ostride]) = r0;
        c_im(out[i * ostride]) = i0;
        c_re(out[(i + 1) * ostride]) = r1;
        c_im(out[(i + 1) * ostride]) = i1;
        c_re(out[(i + 2) * ostride]) = r2;
        c_im(out[(i + 2) * ostride]) = i2;
        c_re(out[(i + 3) * ostride]) = r3;
        c_im(out[(i + 3) * ostride]) = i3;
     }
}

#endif


static void executor_many(int n, const fftw_complex *in,
                    fftw_complex *out,
                    fftw_plan_node *p,
                    int istride,
                    int ostride,
                    int howmany, int idist, int odist,
                    fftw_recurse_kind recurse_kind)
{
     int s;

     switch (p->type) {
       case FFTW_NOTW:
            {
               fftw_notw_codelet *codelet = p->nodeu.notw.codelet;

               HACK_ALIGN_STACK_ODD;
               for (s = 0; s < howmany; ++s)
                  codelet(in + s * idist,
                        out + s * odist,
                        istride, ostride);
               break;
            }

       default:
            for (s = 0; s < howmany; ++s)
               fftw_executor_simple(n, in + s * idist,
                              out + s * odist,
                              p, istride, ostride,
                              recurse_kind);
     }
}

#ifdef FFTW_ENABLE_VECTOR_RECURSE

/* executor_many_vector is like executor_many, but it pushes the
   howmany loop down to the leaves of the transform: */
static void executor_many_vector(int n, const fftw_complex *in,
                         fftw_complex *out,
                         fftw_plan_node *p,
                         int istride,
                         int ostride,
                         int howmany, int idist, int odist)
{
     int s;

     switch (p->type) {
       case FFTW_NOTW:
            {
               fftw_notw_codelet *codelet = p->nodeu.notw.codelet;

               HACK_ALIGN_STACK_ODD;
               for (s = 0; s < howmany; ++s)
                  codelet(in + s * idist,
                        out + s * odist,
                        istride, ostride);
               break;
            }

       case FFTW_TWIDDLE:
            {
               int r = p->nodeu.twiddle.size;
               int m = n / r;
               fftw_twiddle_codelet *codelet;
               fftw_complex *W;

               for (s = 0; s < r; ++s)
                  executor_many_vector(m, in + s * istride, 
                                   out + s * (m * ostride),
                                   p->nodeu.twiddle.recurse,
                                   istride * r, ostride,
                                   howmany, idist, odist);

               codelet = p->nodeu.twiddle.codelet;
               W = p->nodeu.twiddle.tw->twarray;

               /* This may not be the right thing.  We maybe should have
                  the howmany loop for the twiddle codelets at the
                  topmost level of the recursion, since odist is big;
                  i.e. separate recursions for twiddle and notwiddle. */
               HACK_ALIGN_STACK_EVEN;
               for (s = 0; s < howmany; ++s)
                  codelet(out + s * odist, W, m * ostride, m, ostride);

               break;
            }

       case FFTW_GENERIC:
            {
               int r = p->nodeu.generic.size;
               int m = n / r;
               fftw_generic_codelet *codelet;
               fftw_complex *W;

               for (s = 0; s < r; ++s)
                  executor_many_vector(m, in + s * istride, 
                                   out + s * (m * ostride),
                                   p->nodeu.generic.recurse,
                                   istride * r, ostride,
                                   howmany, idist, odist);

               codelet = p->nodeu.generic.codelet;
               W = p->nodeu.generic.tw->twarray;
               for (s = 0; s < howmany; ++s)
                  codelet(out + s * odist, W, m, r, n, ostride);

               break;
            }

       case FFTW_RADER:
            {
               int r = p->nodeu.rader.size;
               int m = n / r;
               fftw_rader_codelet *codelet;
               fftw_complex *W;

               for (s = 0; s < r; ++s)
                  executor_many_vector(m, in + s * istride, 
                                   out + s * (m * ostride),
                                   p->nodeu.rader.recurse,
                                   istride * r, ostride,
                                   howmany, idist, odist);

               codelet = p->nodeu.rader.codelet;
               W = p->nodeu.rader.tw->twarray;
               for (s = 0; s < howmany; ++s)
                  codelet(out + s * odist, W, m, r, ostride,
                        p->nodeu.rader.rader_data);

               break;
            }

       default:
            fftw_die("BUG in executor: invalid plan\n");
            break;
     }     
}

#endif /* FFTW_ENABLE_VECTOR_RECURSE */

/*
 * Do *not* declare simple executor static--we need to call it
 * from other files...also, preface its name with "fftw_"
 * to avoid any possible name collisions. 
 */
void fftw_executor_simple(int n, const fftw_complex *in,
                    fftw_complex *out,
                    fftw_plan_node *p,
                    int istride,
                    int ostride,
                    fftw_recurse_kind recurse_kind)
{
     switch (p->type) {
       case FFTW_NOTW:
            HACK_ALIGN_STACK_ODD;
            (p->nodeu.notw.codelet)(in, out, istride, ostride);
            break;

       case FFTW_TWIDDLE:
            {
               int r = p->nodeu.twiddle.size;
               int m = n / r;
               fftw_twiddle_codelet *codelet;
               fftw_complex *W;

#ifdef FFTW_ENABLE_VECTOR_RECURSE
               if (recurse_kind == FFTW_NORMAL_RECURSE)
#endif
                  executor_many(m, in, out,
                              p->nodeu.twiddle.recurse,
                              istride * r, ostride,
                              r, istride, m * ostride,
                              FFTW_NORMAL_RECURSE);
#ifdef FFTW_ENABLE_VECTOR_RECURSE
               else
                  executor_many_vector(m, in, out,
                                   p->nodeu.twiddle.recurse,
                                   istride * r, ostride,
                                   r, istride, m * ostride);
#endif

               codelet = p->nodeu.twiddle.codelet;
               W = p->nodeu.twiddle.tw->twarray;

               HACK_ALIGN_STACK_EVEN;
               codelet(out, W, m * ostride, m, ostride);

               break;
            }

       case FFTW_GENERIC:
            {
               int r = p->nodeu.generic.size;
               int m = n / r;
               fftw_generic_codelet *codelet;
               fftw_complex *W;

#ifdef FFTW_ENABLE_VECTOR_RECURSE
               if (recurse_kind == FFTW_NORMAL_RECURSE)
#endif
                  executor_many(m, in, out,
                              p->nodeu.generic.recurse,
                              istride * r, ostride,
                              r, istride, m * ostride,
                                      FFTW_NORMAL_RECURSE);
#ifdef FFTW_ENABLE_VECTOR_RECURSE
               else
                  executor_many_vector(m, in, out,
                                   p->nodeu.generic.recurse,
                                   istride * r, ostride,
                                   r, istride, m * ostride);
#endif

               codelet = p->nodeu.generic.codelet;
               W = p->nodeu.generic.tw->twarray;
               codelet(out, W, m, r, n, ostride);

               break;
            }

       case FFTW_RADER:
            {
               int r = p->nodeu.rader.size;
               int m = n / r;
               fftw_rader_codelet *codelet;
               fftw_complex *W;

#ifdef FFTW_ENABLE_VECTOR_RECURSE
               if (recurse_kind == FFTW_NORMAL_RECURSE)
#endif
                  executor_many(m, in, out,
                              p->nodeu.rader.recurse,
                              istride * r, ostride,
                              r, istride, m * ostride,
                                      FFTW_NORMAL_RECURSE);
#ifdef FFTW_ENABLE_VECTOR_RECURSE
               else
                  executor_many_vector(m, in, out,
                                   p->nodeu.rader.recurse,
                                   istride * r, ostride,
                                   r, istride, m * ostride);
#endif

               codelet = p->nodeu.rader.codelet;
               W = p->nodeu.rader.tw->twarray;
               codelet(out, W, m, r, ostride,
                     p->nodeu.rader.rader_data);

               break;
            }

       default:
            fftw_die("BUG in executor: invalid plan\n");
            break;
     }
}

static void executor_simple_inplace(int n, fftw_complex *in,
                            fftw_complex *out,
                            fftw_plan_node *p,
                            int istride,
                            fftw_recurse_kind recurse_kind)
{
     switch (p->type) {
       case FFTW_NOTW:
            HACK_ALIGN_STACK_ODD;
            (p->nodeu.notw.codelet)(in, in, istride, istride);
            break;

       default:
            {
               fftw_complex *tmp;

               if (out)
                  tmp = out;
               else
                  tmp = (fftw_complex *)
                      fftw_malloc(n * sizeof(fftw_complex));

               fftw_executor_simple(n, in, tmp, p, istride, 1,
                              recurse_kind);
               fftw_strided_copy(n, tmp, istride, in);

               if (!out)
                  fftw_free(tmp);
            }
     }
}

static void executor_many_inplace(int n, fftw_complex *in,
                          fftw_complex *out,
                          fftw_plan_node *p,
                          int istride,
                          int howmany, int idist,
                          fftw_recurse_kind recurse_kind)
{
     switch (p->type) {
       case FFTW_NOTW:
            {
               fftw_notw_codelet *codelet = p->nodeu.notw.codelet;
               int s;

               HACK_ALIGN_STACK_ODD;
               for (s = 0; s < howmany; ++s)
                  codelet(in + s * idist,
                        in + s * idist,
                        istride, istride);
               break;
            }

       default:
            {
               int s;
               fftw_complex *tmp;
               if (out)
                  tmp = out;
               else
                  tmp = (fftw_complex *)
                      fftw_malloc(n * sizeof(fftw_complex));

               for (s = 0; s < howmany; ++s) {
                  fftw_executor_simple(n,
                                   in + s * idist,
                                   tmp,
                                   p, istride, 1, recurse_kind);
                  fftw_strided_copy(n, tmp, istride, in + s * idist);
               }

               if (!out)
                  fftw_free(tmp);
            }
     }
}

/* user interface */
void fftw(fftw_plan plan, int howmany, fftw_complex *in, int istride,
        int idist, fftw_complex *out, int ostride, int odist)
{
     int n = plan->n;

     if (plan->flags & FFTW_IN_PLACE) {
        if (howmany == 1) {
             executor_simple_inplace(n, in, out, plan->root, istride,
                               plan->recurse_kind);
        } else {
             executor_many_inplace(n, in, out, plan->root, istride, howmany,
                             idist, plan->recurse_kind);
        }
     } else {
        if (howmany == 1) {
             fftw_executor_simple(n, in, out, plan->root, istride, ostride,
                            plan->recurse_kind);
        } else {
#ifdef FFTW_ENABLE_VECTOR_RECURSE
             int vector_size = plan->vector_size;
             if (vector_size <= 1)
#endif
                executor_many(n, in, out, plan->root, istride, ostride,
                          howmany, idist, odist, plan->recurse_kind);
#ifdef FFTW_ENABLE_VECTOR_RECURSE
             else {
                int s;
                int num_vects = howmany / vector_size;
                fftw_plan_node *root = plan->root;

                for (s = 0; s < num_vects; ++s)
                   executor_many_vector(n, 
                                   in + s * (vector_size * idist), 
                                   out + s * (vector_size * odist),
                                   root,
                                   istride, ostride,
                                   vector_size, idist, odist);

                s = howmany % vector_size;
                if (s > 0)
                   executor_many(n,
                               in + num_vects * (vector_size * idist), 
                               out + num_vects * (vector_size * odist),
                               root,
                               istride, ostride,
                               s, idist, odist, 
                               FFTW_NORMAL_RECURSE);
             }
#endif
        }
     }
}

void fftw_one(fftw_plan plan, fftw_complex *in, fftw_complex *out)
{
     int n = plan->n;

     if (plan->flags & FFTW_IN_PLACE)
        executor_simple_inplace(n, in, out, plan->root, 1,
                          plan->recurse_kind);
     else
        fftw_executor_simple(n, in, out, plan->root, 1, 1,
                         plan->recurse_kind);
}

Generated by  Doxygen 1.6.0   Back to index