Developer Reference for Intel® oneAPI Math Kernel Library for Fortran

ID 766686
Date 3/31/2025
Public
Document Table of Contents

Examples for Cluster FFT Functions

2D Out-of-place Cluster FFT Computation

The following C example computes a 2-dimensional out-of-place FFT using the cluster FFT interface:

/* C99 example */
#include <stdlib.h>
#include "mpi.h"
#include "mkl_cdft.h"
#include <math.h>

#ifndef M_PI
#define M_PI (3.14159265358979323846)
#endif
#define DBL_EPSILON 2.2204460492503131e-16

#define CHK_ERR_EQ(ERR_CODE, SUCCESS_CODE) do { \
  if (ERR_CODE != SUCCESS_CODE) \
    goto finish; \
} while(0)

#define CHK_MKL_ERR(MKL_ERR) CHK_ERR_EQ(MKL_ERR, DFTI_NO_ERROR)
#define CHK_MPI_ERR(MPI_ERR) CHK_ERR_EQ(MPI_ERR, MPI_SUCCESS)

int main(int argc, char* argv[])
{
  DFTI_DESCRIPTOR_DM_HANDLE desc = NULL;
  MKL_Complex16 *in = NULL, *out = NULL;
  MKL_LONG v, i, j, n, s, mkl_err;
  const MKL_LONG lengths[2] = {128, 64};
  const MKL_LONG h = 7, k = 6;
  MKL_Complex16 local_err;
  double phase, max_dft_err = 0.0;
  int mpi_err = MPI_Init(&argc, &argv); CHK_MPI_ERR(mpi_err);
  int exit_code = EXIT_FAILURE;

  /* Create descriptor for 2D FFT */
  mkl_err = DftiCreateDescriptorDM(MPI_COMM_WORLD, &desc, DFTI_DOUBLE,
                                   DFTI_COMPLEX, 2, lengths); CHK_MKL_ERR(mkl_err);
  /* Ask necessary length of in and out arrays and allocate memory */
  mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_SIZE, &v); CHK_MKL_ERR(mkl_err);
  in = (MKL_Complex16*) malloc(v*sizeof(MKL_Complex16));
  out = (MKL_Complex16*) malloc(v*sizeof(MKL_Complex16));
  if (in == NULL || out == NULL) goto finish;
  /* Fill local array with initial data. Current process performs n rows,
     0th row of in corresponds to s-th row of virtual global array */
  mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_NX, &n); CHK_MKL_ERR(mkl_err);
  mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_X_START, &s); CHK_MKL_ERR(mkl_err);
  /* Virtual global array globalIN is such that (0<=i<n, 0<=j<lengths[1])
     globalIN[(i+s)*lengths[1]+j] := in[i*lengths[1]+j] */
  for (i = 0; i < n; ++i) {
    for (j = 0; j < lengths[1]; ++j) {
      phase = 2.0*M_PI*((double)((i+s)*h)/lengths[0] + (double)(j*k)/lengths[1]);
      in[i*lengths[1]+j].real = cos(phase) / (lengths[0]*lengths[1]);
      in[i*lengths[1]+j].imag = sin(phase) / (lengths[0]*lengths[1]);
    }
  }
  /* Configure for an out-of-place transform (default is DFTI_INPLACE) */
  mkl_err = DftiSetValueDM(desc, DFTI_PLACEMENT,
                           DFTI_NOT_INPLACE); CHK_MKL_ERR(mkl_err);
  /* Commit descriptor and compute DFT */
  mkl_err = DftiCommitDescriptorDM(desc); CHK_MKL_ERR(mkl_err);
  mkl_err = DftiComputeForwardDM(desc, in, out); CHK_MKL_ERR(mkl_err);
  /* Virtual global array globalOUT is such that (0<=i<n, 0<=j<lengths[1])
     globalOUT[(i+s)*lengths[1]+j] := out[i*lengths[1]+j] */
  for (i = 0; i < n; ++i) {
    for (j = 0; j < lengths[1]; ++j) {
      local_err = out[i*lengths[1] + j];
      if (i+s == h && j == k)
        local_err.real -= 1.0; // expected value
      max_dft_err = fmax(max_dft_err, hypot(local_err.real, local_err.imag));
    }
  }
  mpi_err = MPI_Allreduce(MPI_IN_PLACE, &max_dft_err, 1, MPI_DOUBLE, MPI_MAX,
                          MPI_COMM_WORLD); CHK_MPI_ERR(mpi_err);
  if (max_dft_err < 5.0*log2((double) lengths[0]*lengths[1])*DBL_EPSILON)
    exit_code = EXIT_SUCCESS;
finish:
  if (mkl_err != DFTI_NO_ERROR || mpi_err != MPI_SUCCESS)
    exit_code = EXIT_FAILURE;
  if (desc) mkl_err = DftiFreeDescriptorDM(&desc);
  if (in) free(in);
  if (out) free(out);
  mpi_err = MPI_Finalize();
  if (mkl_err != DFTI_NO_ERROR || mpi_err != MPI_SUCCESS)
    exit_code = EXIT_FAILURE;
  return exit_code;
}

1D In-place Cluster FFT Computations

The C example below illustrates one-dimensional in-place cluster FFT computations effected with a user-defined workspace:

/* C99 example */
#include <stdlib.h>
#include "mpi.h"
#include "mkl_cdft.h"
#include <math.h>

#ifndef M_PI
#define M_PI (3.14159265358979323846)
#endif
#define DBL_EPSILON 2.2204460492503131e-16

#define CHK_ERR_EQ(ERR_CODE, SUCCESS_CODE) do { \
  if (ERR_CODE != SUCCESS_CODE) \
    goto finish; \
} while(0)

#define CHK_MKL_ERR(MKL_ERR) CHK_ERR_EQ(MKL_ERR, DFTI_NO_ERROR)
#define CHK_MPI_ERR(MPI_ERR) CHK_ERR_EQ(MPI_ERR, MPI_SUCCESS)

int main(int argc, char* argv[])
{
  DFTI_DESCRIPTOR_DM_HANDLE desc = NULL;
  MKL_Complex16 *data = NULL, *work = NULL;
  MKL_LONG v, i, n, n_out, s_out, s, mkl_err;
  const MKL_LONG h = 7;
  const MKL_LONG N = 512;
  MKL_Complex16 local_err;
  double max_dft_err = 0.0;
  int mpi_err = MPI_Init(&argc, &argv); CHK_MPI_ERR(mpi_err);
  int exit_code = EXIT_FAILURE;

  /* Create descriptor for 1D FFT */
  mkl_err = DftiCreateDescriptorDM(MPI_COMM_WORLD, &desc, DFTI_DOUBLE,
                                   DFTI_COMPLEX, 1, N); CHK_MKL_ERR(mkl_err);
  /* Ask necessary length of array and workspace and allocate memory */
  mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_SIZE, &v);
  data = (MKL_Complex16*) malloc(v*sizeof(MKL_Complex16));
  work = (MKL_Complex16*) malloc(v*sizeof(MKL_Complex16));
  if (data == NULL || work == NULL) goto finish;
  /* Fill local array with initial data. Local array has n elements,
     0th element of data corresponds to s-th element of virtual global array */
  mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_NX, &n); CHK_MKL_ERR(mkl_err);
  mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_X_START, &s); CHK_MKL_ERR(mkl_err);
  /* Set work array as a workspace */
  mkl_err = DftiSetValueDM(desc, CDFT_WORKSPACE, work); CHK_MKL_ERR(mkl_err);
  /* Virtual global array globalIN is such that globalIN[s+i] := data[i],
     0 <= i < n  */
  for(i = 0; i < n; ++i) {
    data[i].real = cos(2.0*M_PI*(i+s)*h/N) / N;
    data[i].imag = sin(2.0*M_PI*(i+s)*h/N) / N;
  }
  /* Commit descriptor and compute DFT */
  mkl_err = DftiCommitDescriptorDM(desc); CHK_MKL_ERR(mkl_err);
  mkl_err = DftiComputeForwardDM(desc, data); CHK_MKL_ERR(mkl_err);
  mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_OUT_NX, &n_out); CHK_MKL_ERR(mkl_err);
  mkl_err = DftiGetValueDM(desc, CDFT_LOCAL_OUT_X_START, &s_out); CHK_MKL_ERR(mkl_err);
  /* Virtual global array globalOUT is such that
     globalOUT[s_out + i] := data[i], 0 <= i < n_out */
  for (i = 0; i < n_out; ++i) {
    local_err = data[i];
    if (i+s == h)
      local_err.real -= 1.0; // expected value
    max_dft_err = fmax(max_dft_err, hypot(local_err.real, local_err.imag));
  }
  mpi_err = MPI_Allreduce(MPI_IN_PLACE, &max_dft_err, 1, MPI_DOUBLE, MPI_MAX,
                          MPI_COMM_WORLD); CHK_MPI_ERR(mpi_err);
  if (max_dft_err < 5.0*log2((double) N)*DBL_EPSILON)
    exit_code = EXIT_SUCCESS;

finish:
  if (mkl_err != DFTI_NO_ERROR || mpi_err != MPI_SUCCESS)
    exit_code = EXIT_FAILURE;
  if (desc) mkl_err = DftiFreeDescriptorDM(&desc);
  if (data) free(data);
  if (work) free(work);
  mpi_err = MPI_Finalize();
  if (mkl_err != DFTI_NO_ERROR || mpi_err != MPI_SUCCESS)
    exit_code = EXIT_FAILURE;
  return exit_code;
}