You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
258 lines
11 KiB
C
258 lines
11 KiB
C
|
|
/* Copyright 2005-2014 NVIDIA Corporation. All rights reserved.
|
|
*
|
|
* NOTICE TO LICENSEE:
|
|
*
|
|
* The source code and/or documentation ("Licensed Deliverables") are
|
|
* subject to NVIDIA intellectual property rights under U.S. and
|
|
* international Copyright laws.
|
|
*
|
|
* The Licensed Deliverables contained herein are PROPRIETARY and
|
|
* CONFIDENTIAL to NVIDIA and are being provided under the terms and
|
|
* conditions of a form of NVIDIA software license agreement by and
|
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
|
* the contrary in the License Agreement, reproduction or disclosure
|
|
* of the Licensed Deliverables to any third party without the express
|
|
* written consent of NVIDIA is prohibited.
|
|
*
|
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. THEY ARE
|
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
|
* OF THESE LICENSED DELIVERABLES.
|
|
*
|
|
* U.S. Government End Users. These Licensed Deliverables are a
|
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
|
* 1995), consisting of "commercial computer software" and "commercial
|
|
* computer software documentation" as such terms are used in 48
|
|
* C.F.R. 12.212 (SEPT 1995) and are provided to the U.S. Government
|
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
|
* only those rights set forth herein.
|
|
*
|
|
* Any use of the Licensed Deliverables in individual and commercial
|
|
* software must include, in the user documentation and internal
|
|
* comments to the code, the above Disclaimer and U.S. Government End
|
|
* Users Notice.
|
|
*/
|
|
|
|
/*!
|
|
* \file cufftXt.h
|
|
* \brief Public header file for the NVIDIA CUDA FFT library (CUFFT)
|
|
*/
|
|
|
|
#ifndef _CUFFTXT_H_
|
|
#define _CUFFTXT_H_
|
|
#include "cudalibxt.h"
|
|
#include "cufft.h"
|
|
|
|
|
|
#ifndef CUFFTAPI
|
|
#ifdef _WIN32
|
|
#define CUFFTAPI __stdcall
|
|
#else
|
|
#define CUFFTAPI
|
|
#endif
|
|
#endif
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
//
|
|
// cufftXtSubFormat identifies the data layout of
|
|
// a memory descriptor owned by cufft.
|
|
// note that multi GPU cufft does not yet support out-of-place transforms
|
|
//
|
|
|
|
typedef enum cufftXtSubFormat_t {
|
|
CUFFT_XT_FORMAT_INPUT = 0x00, //by default input is in linear order across GPUs
|
|
CUFFT_XT_FORMAT_OUTPUT = 0x01, //by default output is in scrambled order depending on transform
|
|
CUFFT_XT_FORMAT_INPLACE = 0x02, //by default inplace is input order, which is linear across GPUs
|
|
CUFFT_XT_FORMAT_INPLACE_SHUFFLED = 0x03, //shuffled output order after execution of the transform
|
|
CUFFT_XT_FORMAT_1D_INPUT_SHUFFLED = 0x04, //shuffled input order prior to execution of 1D transforms
|
|
CUFFT_FORMAT_UNDEFINED = 0x05
|
|
} cufftXtSubFormat;
|
|
|
|
//
|
|
// cufftXtCopyType specifies the type of copy for cufftXtMemcpy
|
|
//
|
|
typedef enum cufftXtCopyType_t {
|
|
CUFFT_COPY_HOST_TO_DEVICE = 0x00,
|
|
CUFFT_COPY_DEVICE_TO_HOST = 0x01,
|
|
CUFFT_COPY_DEVICE_TO_DEVICE = 0x02,
|
|
CUFFT_COPY_UNDEFINED = 0x03
|
|
} cufftXtCopyType;
|
|
|
|
//
|
|
// cufftXtQueryType specifies the type of query for cufftXtQueryPlan
|
|
//
|
|
typedef enum cufftXtQueryType_t {
|
|
CUFFT_QUERY_1D_FACTORS = 0x00,
|
|
CUFFT_QUERY_UNDEFINED = 0x01
|
|
} cufftXtQueryType;
|
|
|
|
typedef struct cufftXt1dFactors_t {
|
|
long long int size;
|
|
long long int stringCount;
|
|
long long int stringLength;
|
|
long long int substringLength;
|
|
long long int factor1;
|
|
long long int factor2;
|
|
long long int stringMask;
|
|
long long int substringMask;
|
|
long long int factor1Mask;
|
|
long long int factor2Mask;
|
|
int stringShift;
|
|
int substringShift;
|
|
int factor1Shift;
|
|
int factor2Shift;
|
|
} cufftXt1dFactors;
|
|
|
|
//
|
|
// cufftXtWorkAreaPolicy specifies policy for cufftXtSetWorkAreaPolicy
|
|
//
|
|
typedef enum cufftXtWorkAreaPolicy_t {
|
|
CUFFT_WORKAREA_MINIMAL = 0, /* maximum reduction */
|
|
CUFFT_WORKAREA_USER = 1, /* use workSize parameter as limit */
|
|
CUFFT_WORKAREA_PERFORMANCE = 2, /* default - 1x overhead or more, maximum performance */
|
|
} cufftXtWorkAreaPolicy;
|
|
|
|
// multi-GPU routines
|
|
cufftResult CUFFTAPI cufftXtSetGPUs(cufftHandle handle, int nGPUs, int *whichGPUs);
|
|
|
|
cufftResult CUFFTAPI cufftXtMalloc(cufftHandle plan,
|
|
cudaLibXtDesc ** descriptor,
|
|
cufftXtSubFormat format);
|
|
|
|
cufftResult CUFFTAPI cufftXtMemcpy(cufftHandle plan,
|
|
void *dstPointer,
|
|
void *srcPointer,
|
|
cufftXtCopyType type);
|
|
|
|
cufftResult CUFFTAPI cufftXtFree(cudaLibXtDesc *descriptor);
|
|
|
|
cufftResult CUFFTAPI cufftXtSetWorkArea(cufftHandle plan, void **workArea);
|
|
|
|
cufftResult CUFFTAPI cufftXtExecDescriptorC2C(cufftHandle plan,
|
|
cudaLibXtDesc *input,
|
|
cudaLibXtDesc *output,
|
|
int direction);
|
|
|
|
cufftResult CUFFTAPI cufftXtExecDescriptorR2C(cufftHandle plan,
|
|
cudaLibXtDesc *input,
|
|
cudaLibXtDesc *output);
|
|
|
|
cufftResult CUFFTAPI cufftXtExecDescriptorC2R(cufftHandle plan,
|
|
cudaLibXtDesc *input,
|
|
cudaLibXtDesc *output);
|
|
|
|
cufftResult CUFFTAPI cufftXtExecDescriptorZ2Z(cufftHandle plan,
|
|
cudaLibXtDesc *input,
|
|
cudaLibXtDesc *output,
|
|
int direction);
|
|
|
|
cufftResult CUFFTAPI cufftXtExecDescriptorD2Z(cufftHandle plan,
|
|
cudaLibXtDesc *input,
|
|
cudaLibXtDesc *output);
|
|
|
|
cufftResult CUFFTAPI cufftXtExecDescriptorZ2D(cufftHandle plan,
|
|
cudaLibXtDesc *input,
|
|
cudaLibXtDesc *output);
|
|
|
|
// Utility functions
|
|
|
|
cufftResult CUFFTAPI cufftXtQueryPlan(cufftHandle plan, void *queryStruct, cufftXtQueryType queryType);
|
|
|
|
|
|
// callbacks
|
|
|
|
|
|
typedef enum cufftXtCallbackType_t {
|
|
CUFFT_CB_LD_COMPLEX = 0x0,
|
|
CUFFT_CB_LD_COMPLEX_DOUBLE = 0x1,
|
|
CUFFT_CB_LD_REAL = 0x2,
|
|
CUFFT_CB_LD_REAL_DOUBLE = 0x3,
|
|
CUFFT_CB_ST_COMPLEX = 0x4,
|
|
CUFFT_CB_ST_COMPLEX_DOUBLE = 0x5,
|
|
CUFFT_CB_ST_REAL = 0x6,
|
|
CUFFT_CB_ST_REAL_DOUBLE = 0x7,
|
|
CUFFT_CB_UNDEFINED = 0x8
|
|
|
|
} cufftXtCallbackType;
|
|
|
|
typedef cufftComplex (*cufftCallbackLoadC)(void *dataIn, size_t offset, void *callerInfo, void *sharedPointer);
|
|
typedef cufftDoubleComplex (*cufftCallbackLoadZ)(void *dataIn, size_t offset, void *callerInfo, void *sharedPointer);
|
|
typedef cufftReal (*cufftCallbackLoadR)(void *dataIn, size_t offset, void *callerInfo, void *sharedPointer);
|
|
typedef cufftDoubleReal(*cufftCallbackLoadD)(void *dataIn, size_t offset, void *callerInfo, void *sharedPointer);
|
|
|
|
typedef void (*cufftCallbackStoreC)(void *dataOut, size_t offset, cufftComplex element, void *callerInfo, void *sharedPointer);
|
|
typedef void (*cufftCallbackStoreZ)(void *dataOut, size_t offset, cufftDoubleComplex element, void *callerInfo, void *sharedPointer);
|
|
typedef void (*cufftCallbackStoreR)(void *dataOut, size_t offset, cufftReal element, void *callerInfo, void *sharedPointer);
|
|
typedef void (*cufftCallbackStoreD)(void *dataOut, size_t offset, cufftDoubleReal element, void *callerInfo, void *sharedPointer);
|
|
|
|
|
|
cufftResult CUFFTAPI cufftXtSetCallback(cufftHandle plan, void **callback_routine, cufftXtCallbackType cbType, void **caller_info);
|
|
cufftResult CUFFTAPI cufftXtClearCallback(cufftHandle plan, cufftXtCallbackType cbType);
|
|
cufftResult CUFFTAPI cufftXtSetCallbackSharedSize(cufftHandle plan, cufftXtCallbackType cbType, size_t sharedSize);
|
|
|
|
cufftResult CUFFTAPI cufftXtMakePlanMany(cufftHandle plan,
|
|
int rank,
|
|
long long int *n,
|
|
long long int *inembed,
|
|
long long int istride,
|
|
long long int idist,
|
|
cudaDataType inputtype,
|
|
long long int *onembed,
|
|
long long int ostride,
|
|
long long int odist,
|
|
cudaDataType outputtype,
|
|
long long int batch,
|
|
size_t *workSize,
|
|
cudaDataType executiontype);
|
|
|
|
cufftResult CUFFTAPI cufftXtGetSizeMany(cufftHandle plan,
|
|
int rank,
|
|
long long int *n,
|
|
long long int *inembed,
|
|
long long int istride,
|
|
long long int idist,
|
|
cudaDataType inputtype,
|
|
long long int *onembed,
|
|
long long int ostride,
|
|
long long int odist,
|
|
cudaDataType outputtype,
|
|
long long int batch,
|
|
size_t *workSize,
|
|
cudaDataType executiontype);
|
|
|
|
|
|
cufftResult CUFFTAPI cufftXtExec(cufftHandle plan,
|
|
void *input,
|
|
void *output,
|
|
int direction);
|
|
|
|
cufftResult CUFFTAPI cufftXtExecDescriptor(cufftHandle plan,
|
|
cudaLibXtDesc *input,
|
|
cudaLibXtDesc *output,
|
|
int direction);
|
|
|
|
cufftResult CUFFTAPI cufftXtSetWorkAreaPolicy(cufftHandle plan, cufftXtWorkAreaPolicy policy, size_t *workSize);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif
|