You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
455 lines
19 KiB
C
455 lines
19 KiB
C
|
|
/* Copyright 2005-2014 NVIDIA Corporation. All rights reserved.
|
|
*
|
|
* NOTICE TO LICENSEE:
|
|
*
|
|
* The source code and/or documentation ("Licensed Deliverables") are
|
|
* subject to NVIDIA intellectual property rights under U.S. and
|
|
* international Copyright laws.
|
|
*
|
|
* The Licensed Deliverables contained herein are PROPRIETARY and
|
|
* CONFIDENTIAL to NVIDIA and are being provided under the terms and
|
|
* conditions of a form of NVIDIA software license agreement by and
|
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
|
* the contrary in the License Agreement, reproduction or disclosure
|
|
* of the Licensed Deliverables to any third party without the express
|
|
* written consent of NVIDIA is prohibited.
|
|
*
|
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. THEY ARE
|
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
|
* OF THESE LICENSED DELIVERABLES.
|
|
*
|
|
* U.S. Government End Users. These Licensed Deliverables are a
|
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
|
* 1995), consisting of "commercial computer software" and "commercial
|
|
* computer software documentation" as such terms are used in 48
|
|
* C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government
|
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
|
* only those rights set forth herein.
|
|
*
|
|
* Any use of the Licensed Deliverables in individual and commercial
|
|
* software must include, in the user documentation and internal
|
|
* comments to the code, the above Disclaimer and U.S. Government End
|
|
* Users Notice.
|
|
*/
|
|
|
|
/*!
|
|
* \file cufftw.h
|
|
* \brief Public header file for the NVIDIA CUDA FFTW library (CUFFTW)
|
|
*/
|
|
|
|
#ifndef _CUFFTW_H_
|
|
#define _CUFFTW_H_
|
|
|
|
|
|
#include <stdio.h>
|
|
#include "cufft.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
// transform direction
|
|
#define FFTW_FORWARD -1
|
|
#define FFTW_INVERSE 1
|
|
#define FFTW_BACKWARD 1
|
|
|
|
// Planner flags
|
|
|
|
#define FFTW_ESTIMATE 0x01
|
|
#define FFTW_MEASURE 0x02
|
|
#define FFTW_PATIENT 0x03
|
|
#define FFTW_EXHAUSTIVE 0x04
|
|
#define FFTW_WISDOM_ONLY 0x05
|
|
|
|
//Algorithm restriction flags
|
|
|
|
#define FFTW_DESTROY_INPUT 0x08
|
|
#define FFTW_PRESERVE_INPUT 0x0C
|
|
#define FFTW_UNALIGNED 0x10
|
|
|
|
// CUFFTW defines and supports the following data types
|
|
|
|
// note if complex.h has been included we use the C99 complex types
|
|
#if !defined(FFTW_NO_Complex) && defined(_Complex_I) && defined (complex)
|
|
typedef double _Complex fftw_complex;
|
|
typedef float _Complex fftwf_complex;
|
|
#else
|
|
typedef double fftw_complex[2];
|
|
typedef float fftwf_complex[2];
|
|
#endif
|
|
|
|
typedef void *fftw_plan;
|
|
|
|
typedef void *fftwf_plan;
|
|
|
|
typedef struct {
|
|
int n;
|
|
int is;
|
|
int os;
|
|
} fftw_iodim;
|
|
|
|
typedef fftw_iodim fftwf_iodim;
|
|
|
|
typedef struct {
|
|
ptrdiff_t n;
|
|
ptrdiff_t is;
|
|
ptrdiff_t os;
|
|
} fftw_iodim64;
|
|
|
|
typedef fftw_iodim64 fftwf_iodim64;
|
|
|
|
|
|
// CUFFTW defines and supports the following double precision APIs
|
|
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_dft_1d(int n,
|
|
fftw_complex *in,
|
|
fftw_complex *out,
|
|
int sign,
|
|
unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_dft_2d(int n0,
|
|
int n1,
|
|
fftw_complex *in,
|
|
fftw_complex *out,
|
|
int sign,
|
|
unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_dft_3d(int n0,
|
|
int n1,
|
|
int n2,
|
|
fftw_complex *in,
|
|
fftw_complex *out,
|
|
int sign,
|
|
unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_dft(int rank,
|
|
const int *n,
|
|
fftw_complex *in,
|
|
fftw_complex *out,
|
|
int sign,
|
|
unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_dft_r2c_1d(int n,
|
|
double *in,
|
|
fftw_complex *out,
|
|
unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_dft_r2c_2d(int n0,
|
|
int n1,
|
|
double *in,
|
|
fftw_complex *out,
|
|
unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_dft_r2c_3d(int n0,
|
|
int n1,
|
|
int n2,
|
|
double *in,
|
|
fftw_complex *out,
|
|
unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_dft_r2c(int rank,
|
|
const int *n,
|
|
double *in,
|
|
fftw_complex *out,
|
|
unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_dft_c2r_1d(int n,
|
|
fftw_complex *in,
|
|
double *out,
|
|
unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_dft_c2r_2d(int n0,
|
|
int n1,
|
|
fftw_complex *in,
|
|
double *out,
|
|
unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_dft_c2r_3d(int n0,
|
|
int n1,
|
|
int n2,
|
|
fftw_complex *in,
|
|
double *out,
|
|
unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_dft_c2r(int rank,
|
|
const int *n,
|
|
fftw_complex *in,
|
|
double *out,
|
|
unsigned flags);
|
|
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_many_dft(int rank,
|
|
const int *n,
|
|
int batch,
|
|
fftw_complex *in,
|
|
const int *inembed, int istride, int idist,
|
|
fftw_complex *out,
|
|
const int *onembed, int ostride, int odist,
|
|
int sign, unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_many_dft_r2c(int rank,
|
|
const int *n,
|
|
int batch,
|
|
double *in,
|
|
const int *inembed, int istride, int idist,
|
|
fftw_complex *out,
|
|
const int *onembed, int ostride, int odist,
|
|
unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_many_dft_c2r(int rank,
|
|
const int *n,
|
|
int batch,
|
|
fftw_complex *in,
|
|
const int *inembed, int istride, int idist,
|
|
double *out,
|
|
const int *onembed, int ostride, int odist,
|
|
unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_guru_dft(int rank, const fftw_iodim *dims,
|
|
int batch_rank, const fftw_iodim *batch_dims,
|
|
fftw_complex *in, fftw_complex *out,
|
|
int sign, unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_guru_dft_r2c(int rank, const fftw_iodim *dims,
|
|
int batch_rank, const fftw_iodim *batch_dims,
|
|
double *in, fftw_complex *out,
|
|
unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_guru_dft_c2r(int rank, const fftw_iodim *dims,
|
|
int batch_rank, const fftw_iodim *batch_dims,
|
|
fftw_complex *in, double *out,
|
|
unsigned flags);
|
|
|
|
void CUFFTAPI fftw_execute(const fftw_plan plan);
|
|
|
|
void CUFFTAPI fftw_execute_dft(const fftw_plan plan,
|
|
fftw_complex *idata,
|
|
fftw_complex *odata);
|
|
|
|
void CUFFTAPI fftw_execute_dft_r2c(const fftw_plan plan,
|
|
double *idata,
|
|
fftw_complex *odata);
|
|
|
|
void CUFFTAPI fftw_execute_dft_c2r(const fftw_plan plan,
|
|
fftw_complex *idata,
|
|
double *odata);
|
|
|
|
|
|
// CUFFTW defines and supports the following single precision APIs
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_dft_1d(int n,
|
|
fftwf_complex *in,
|
|
fftwf_complex *out,
|
|
int sign,
|
|
unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_dft_2d(int n0,
|
|
int n1,
|
|
fftwf_complex *in,
|
|
fftwf_complex *out,
|
|
int sign,
|
|
unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_dft_3d(int n0,
|
|
int n1,
|
|
int n2,
|
|
fftwf_complex *in,
|
|
fftwf_complex *out,
|
|
int sign,
|
|
unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_dft(int rank,
|
|
const int *n,
|
|
fftwf_complex *in,
|
|
fftwf_complex *out,
|
|
int sign,
|
|
unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_dft_r2c_1d(int n,
|
|
float *in,
|
|
fftwf_complex *out,
|
|
unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_dft_r2c_2d(int n0,
|
|
int n1,
|
|
float *in,
|
|
fftwf_complex *out,
|
|
unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_dft_r2c_3d(int n0,
|
|
int n1,
|
|
int n2,
|
|
float *in,
|
|
fftwf_complex *out,
|
|
unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_dft_r2c(int rank,
|
|
const int *n,
|
|
float *in,
|
|
fftwf_complex *out,
|
|
unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_dft_c2r_1d(int n,
|
|
fftwf_complex *in,
|
|
float *out,
|
|
unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_dft_c2r_2d(int n0,
|
|
int n1,
|
|
fftwf_complex *in,
|
|
float *out,
|
|
unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_dft_c2r_3d(int n0,
|
|
int n1,
|
|
int n2,
|
|
fftwf_complex *in,
|
|
float *out,
|
|
unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_dft_c2r(int rank,
|
|
const int *n,
|
|
fftwf_complex *in,
|
|
float *out,
|
|
unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_many_dft(int rank,
|
|
const int *n,
|
|
int batch,
|
|
fftwf_complex *in,
|
|
const int *inembed, int istride, int idist,
|
|
fftwf_complex *out,
|
|
const int *onembed, int ostride, int odist,
|
|
int sign, unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_many_dft_r2c(int rank,
|
|
const int *n,
|
|
int batch,
|
|
float *in,
|
|
const int *inembed, int istride, int idist,
|
|
fftwf_complex *out,
|
|
const int *onembed, int ostride, int odist,
|
|
unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_many_dft_c2r(int rank,
|
|
const int *n,
|
|
int batch,
|
|
fftwf_complex *in,
|
|
const int *inembed, int istride, int idist,
|
|
float *out,
|
|
const int *onembed, int ostride, int odist,
|
|
unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_guru_dft(int rank, const fftwf_iodim *dims,
|
|
int batch_rank, const fftwf_iodim *batch_dims,
|
|
fftwf_complex *in, fftwf_complex *out,
|
|
int sign, unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_guru_dft_r2c(int rank, const fftwf_iodim *dims,
|
|
int batch_rank, const fftwf_iodim *batch_dims,
|
|
float *in, fftwf_complex *out,
|
|
unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_guru_dft_c2r(int rank, const fftwf_iodim *dims,
|
|
int batch_rank, const fftwf_iodim *batch_dims,
|
|
fftwf_complex *in, float *out,
|
|
unsigned flags);
|
|
|
|
void CUFFTAPI fftwf_execute(const fftw_plan plan);
|
|
|
|
void CUFFTAPI fftwf_execute_dft(const fftwf_plan plan,
|
|
fftwf_complex *idata,
|
|
fftwf_complex *odata);
|
|
|
|
void CUFFTAPI fftwf_execute_dft_r2c(const fftwf_plan plan,
|
|
float *idata,
|
|
fftwf_complex *odata);
|
|
|
|
void CUFFTAPI fftwf_execute_dft_c2r(const fftwf_plan plan,
|
|
fftwf_complex *idata,
|
|
float *odata);
|
|
|
|
/// CUFFTW 64-bit Guru Interface
|
|
/// dp
|
|
fftw_plan CUFFTAPI fftw_plan_guru64_dft(int rank, const fftw_iodim64* dims, int batch_rank, const fftw_iodim64* batch_dims, fftw_complex* in, fftw_complex* out, int sign, unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_guru64_dft_r2c(int rank, const fftw_iodim64* dims, int batch_rank, const fftw_iodim64* batch_dims, double* in, fftw_complex* out, unsigned flags);
|
|
|
|
fftw_plan CUFFTAPI fftw_plan_guru64_dft_c2r(int rank, const fftw_iodim64* dims, int batch_rank, const fftw_iodim64* batch_dims, fftw_complex* in, double* out, unsigned flags);
|
|
|
|
/// sp
|
|
fftwf_plan CUFFTAPI fftwf_plan_guru64_dft(int rank, const fftwf_iodim64* dims, int batch_rank, const fftwf_iodim64* batch_dims, fftwf_complex* in, fftwf_complex* out, int sign, unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_guru64_dft_r2c(int rank, const fftwf_iodim64* dims, int batch_rank, const fftwf_iodim64* batch_dims, float* in, fftwf_complex* out, unsigned flags);
|
|
|
|
fftwf_plan CUFFTAPI fftwf_plan_guru64_dft_c2r(int rank, const fftwf_iodim64* dims, int batch_rank, const fftwf_iodim64* batch_dims, fftwf_complex* in, float* out, unsigned flags);
|
|
|
|
#ifdef _WIN32
|
|
#define _CUFFTAPI(T) T CUFFTAPI
|
|
#else
|
|
#define _CUFFTAPI(T) CUFFTAPI T
|
|
#endif
|
|
|
|
// CUFFTW defines and supports the following support APIs
|
|
_CUFFTAPI(void *) fftw_malloc(size_t n);
|
|
|
|
_CUFFTAPI(void *) fftwf_malloc(size_t n);
|
|
|
|
void CUFFTAPI fftw_free(void *pointer);
|
|
|
|
void CUFFTAPI fftwf_free(void *pointer);
|
|
|
|
void CUFFTAPI fftw_export_wisdom_to_file(FILE * output_file);
|
|
|
|
void CUFFTAPI fftwf_export_wisdom_to_file(FILE * output_file);
|
|
|
|
void CUFFTAPI fftw_import_wisdom_from_file(FILE * input_file);
|
|
|
|
void CUFFTAPI fftwf_import_wisdom_from_file(FILE * input_file);
|
|
|
|
void CUFFTAPI fftw_print_plan(const fftw_plan plan);
|
|
|
|
void CUFFTAPI fftwf_print_plan(const fftwf_plan plan);
|
|
|
|
void CUFFTAPI fftw_set_timelimit(double seconds);
|
|
|
|
void CUFFTAPI fftwf_set_timelimit(double seconds);
|
|
|
|
double CUFFTAPI fftw_cost(const fftw_plan plan);
|
|
|
|
double CUFFTAPI fftwf_cost(const fftw_plan plan);
|
|
|
|
void CUFFTAPI fftw_flops(const fftw_plan plan, double *add, double *mul, double *fma);
|
|
|
|
void CUFFTAPI fftwf_flops(const fftw_plan plan, double *add, double *mul, double *fma);
|
|
|
|
void CUFFTAPI fftw_destroy_plan(fftw_plan plan);
|
|
|
|
void CUFFTAPI fftwf_destroy_plan(fftwf_plan plan);
|
|
|
|
void CUFFTAPI fftw_cleanup(void);
|
|
|
|
void CUFFTAPI fftwf_cleanup(void);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif /* _CUFFTW_H_ */
|