1 #ifndef CUDAPMESOLVERUTIL_H     2 #define CUDAPMESOLVERUTIL_H    11 #include <hipfft/hipfft.h>    23 #if defined(NAMD_CUDA) || defined(NAMD_HIP)    24 void writeComplexToDisk(
const float2 *d_data, 
const int size, 
const char* filename, cudaStream_t stream);
    26 void writeRealToDisk(
const float *d_data, 
const int size, 
const char* filename, cudaStream_t stream);
    28 #if defined(NAMD_CUDA) || defined(NAMD_HIP)    29 #define cufftCheck(stmt) do {                                           \    30   cufftResult err = stmt;                                               \    31   if (err != CUFFT_SUCCESS) {                                           \    33           sprintf(msg, "%s in file %s, function %s\n", #stmt,__FILE__,__FUNCTION__); \    43 #if defined(NAMD_CUDA) || defined(NAMD_HIP)     44   cufftHandle forwardPlan, backwardPlan;
    45   cufftType_t forwardType, backwardType;
    52         float* allocateData(
const int dataSizeRequired);
    53         void plan3D(
int *n, 
int flags);
    54         void plan2D(
int *n, 
int howmany, 
int flags);
    55         void plan1DX(
int *n, 
int howmany, 
int flags);
    56         void plan1DY(
int *n, 
int howmany, 
int flags);
    57         void plan1DZ(
int *n, 
int howmany, 
int flags);
    78         float *d_bm1, *d_bm2, *d_bm3;
    84         EnergyVirial* d_energyVirial;
    85         EnergyVirial* h_energyVirial;
    86         cudaEvent_t copyEnergyVirialEvent;
    90         static void energyAndVirialCheck(
void *arg, 
double walltime);
    96                 int deviceID, cudaStream_t stream, 
unsigned int iGrid = 0);
    98         void solve(
Lattice &lattice, 
const bool doEnergy, 
const bool doVirial, 
float* data);
   115         bool gridTexObjActive;
   116         cudaTextureObject_t gridTexObj;
   125         void setupGridData(
float* 
data, 
int data_len);
   127         size_t d_atomsCapacity;
   133         size_t d_forceCapacity;
   138   cudaEvent_t gatherForceEvent;
   143   static void cuda_gatherforce_check(
void *arg, 
double walltime);
   146                 int deviceID, cudaStream_t stream);
   163 #ifndef P2P_ENABLE_3D   170         std::vector<float2*> dataPtrsYZX;
   171         std::vector<float2*> dataPtrsZXY;
   179         void copyDataToPeerDevice(
const int iblock,
   180                 const int iblock_out, 
const int jblock_out, 
const int kblock_out,
   181                 int deviceID_out, 
int permutation_out, float2* data_out);
   184                 const int jblock, 
const int kblock, 
int deviceID, cudaStream_t stream);
   186         void setDataPtrsYZX(std::vector<float2*>& dataPtrsNew, float2* data);
   187         void setDataPtrsZXY(std::vector<float2*>& dataPtrsNew, float2* data);
   194 #ifndef P2P_ENABLE_3D   223 #ifndef USE_TABLE_ARRAYS   236 #if defined(NAMD_CUDA) || defined(NAMD_HIP) //to enable when hipfft full support is ready   338       double3* patchMin, double3* patchMax, double3* awayDists);
   341   void calcSelfEnergyAlch(
int step);
   342   void scaleAndComputeFEPEnergyVirials(
const EnergyVirial* energyVirials, 
int step, 
double& energy, 
double& energy_F, 
double (&virial)[9]);
   343   void scaleAndComputeTIEnergyVirials(
const EnergyVirial* energyVirials, 
int step, 
double& energy, 
double& energy_TI_1, 
double& energy_TI_2, 
double (&virial)[9]);
   344   void scaleAndMergeForce(
int step);
   350     return reductionGpuResident;
   359 #endif // CUDAPMESOLVERUTIL_H void finishReduction(bool doEnergyVirial)
 
CudaPmeOneDevice(PmeGrid pmeGrid_, int deviceID_, int deviceIndex_)
 
cufftHandle * backwardPlans
 
void energyAndVirialSetCallback(CudaPmePencilXYZ *pencilPtr)
 
bool force_scaling_alch_first_time
 
CudaPmeTranspose(PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, int deviceID, cudaStream_t stream)
 
double * d_selfEnergy_FEP
 
~CudaPmeRealSpaceCompute()
 
EnergyVirial * d_energyVirials
 
void checkPatchLevelLatticeCompatibilityAndComputeOffsets(const Lattice &lattice, const int numPatches, const CudaLocalRecord *localRecords, double3 *patchMin, double3 *patchMax, double3 *awayDists)
 
void checkPatchLevelSimParamCompatibility(const int order, const bool periodicY, const bool periodicZ)
 
void spreadCharge(Lattice &lattice)
 
void copyAtoms(const int numAtoms, const CudaAtom *atoms)
 
PatchLevelPmeData patchLevelPmeData
 
CudaPmeRealSpaceCompute(PmeGrid pmeGrid, const int jblock, const int kblock, int deviceID, cudaStream_t stream)
 
void copyDataToPeerDeviceZXY(const int iblock, int deviceID_out, int permutation_out, float2 *data_out)
 
void copyDataDeviceToDevice(const int iblock, float2 *data_out)
 
CudaFFTCompute(int deviceID, cudaStream_t stream)
 
int computeSharedMemoryPatchLevelSpreadCharge(const int numThreads, const int3 patchGridDim, const int order)
 
void copyDataDeviceToHost(const int iblock, float2 *h_data, const int h_dataSize)
 
bool self_energy_alch_first_time
 
void getVirial(double *virial)
 
cudaTextureObject_t * gridTexObjArrays
 
double * d_selfEnergy_TI_2
 
void writeHostComplexToDisk(const float2 *h_data, const int size, const char *filename)
 
void gatherForce(Lattice &lattice, CudaForce *force)
 
void writeComplexToDisk(const float2 *d_data, const int size, const char *filename, cudaStream_t stream)
 
void waitStreamSynchronize()
 
void solve(Lattice &lattice, const bool doEnergy, const bool doVirial, float *data)
 
void gatherForceSetCallback(ComputePmeCUDADevice *devicePtr_in)
 
void setDataPtrsYZX(std::vector< float2 *> &dataPtrsNew, float2 *data)
 
void transposeXYZtoYZX(const float2 *data)
 
void setDataPtrsZXY(std::vector< float2 *> &dataPtrsNew, float2 *data)
 
CudaPmeKSpaceCompute(PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, double kappa, int deviceID, cudaStream_t stream, unsigned int iGrid=0)
 
void writeRealToDisk(const float *d_data, const int size, const char *filename, cudaStream_t stream)
 
cufftHandle * forwardPlans
 
void copyDataHostToDevice(const int iblock, float2 *data_in, float2 *data_out)
 
void transposeXYZtoZXY(const float2 *data)
 
void waitGatherForceDone()
 
void copyDataToPeerDeviceYZX(const int iblock, int deviceID_out, int permutation_out, float2 *data_out)
 
void waitEnergyAndVirial()
 
int computeSharedMemoryPatchLevelGatherForce(const int numThreads, const int3 patchGridDim, const int order)
 
float2 * getBuffer(const int iblock)
 
int getShiftedGrid(const double x, const int grid)
 
double * d_selfEnergy_TI_1
 
EnergyVirial * h_energyVirials
 
float * d_scaling_factors
 
void checkPatchLevelDeviceCompatibility()
 
void compute(const Lattice &lattice, int doEnergyVirial, int step)