NAMD
Classes | Public Member Functions | Public Attributes | List of all members
CudaPmeOneDevice Class Reference

#include <CudaPmeSolverUtil.h>

Classes

struct  EnergyVirial
 

Public Member Functions

 CudaPmeOneDevice (PmeGrid pmeGrid_, int deviceID_, int deviceIndex_)
 
 ~CudaPmeOneDevice ()
 
void compute (const Lattice &lattice, int doEnergyVirial, int step)
 
void finishReduction (bool doEnergyVirial)
 
int getShiftedGrid (const double x, const int grid)
 
int computeSharedMemoryPatchLevelSpreadCharge (const int numThreads, const int3 patchGridDim, const int order)
 
int computeSharedMemoryPatchLevelGatherForce (const int numThreads, const int3 patchGridDim, const int order)
 
void checkPatchLevelSimParamCompatibility (const int order, const bool periodicY, const bool periodicZ)
 
void checkPatchLevelDeviceCompatibility ()
 
void checkPatchLevelLatticeCompatibilityAndComputeOffsets (const Lattice &lattice, const int numPatches, const CudaLocalRecord *localRecords, double3 *patchMin, double3 *patchMax, double3 *awayDists)
 

Public Attributes

PmeGrid pmeGrid
 
int deviceID
 
int deviceIndex
 
cudaStream_t stream
 
int natoms
 
size_t num_used_grids
 
float4 * d_atoms
 
int * d_partition
 
float3 * d_forces
 
float * d_scaling_factors
 
cudaTextureObject_t * gridTexObjArrays
 
float * d_grids
 
float2 * d_trans
 
size_t gridsize
 
size_t transize
 
cufftHandle * forwardPlans
 
cufftHandle * backwardPlans
 
float * d_bm1
 
float * d_bm2
 
float * d_bm3
 
double kappa
 
EnergyViriald_energyVirials
 
EnergyVirialh_energyVirials
 
bool self_energy_alch_first_time
 
bool force_scaling_alch_first_time
 
double * d_selfEnergy
 
double * d_selfEnergy_FEP
 
double * d_selfEnergy_TI_1
 
double * d_selfEnergy_TI_2
 
double selfEnergy
 
double selfEnergy_FEP
 
double selfEnergy_TI_1
 
double selfEnergy_TI_2
 
int m_step
 
PatchLevelPmeData patchLevelPmeData
 
Lattice currentLattice
 

Detailed Description

PME for single GPU case, where data persists on GPU calls real space, FFT, and K space parts receives atom and charge data as float4 * allocated on device returns force data as float3 * allocated on device returns energy and virial allocated on device

Definition at line 209 of file CudaPmeSolverUtil.h.

Constructor & Destructor Documentation

◆ CudaPmeOneDevice()

CudaPmeOneDevice::CudaPmeOneDevice ( PmeGrid  pmeGrid_,
int  deviceID_,
int  deviceIndex_ 
)

Definition at line 1303 of file CudaPmeSolverUtil.C.

References SimParameters::alchFepOn, SimParameters::alchGetNumOfPMEGrids(), SimParameters::alchOn, SimParameters::alchThermIntOn, backwardPlans, checkPatchLevelDeviceCompatibility(), checkPatchLevelSimParamCompatibility(), compute_b_moduli(), cudaCheck, SimParameters::CUDASOAintegrateMode, cufftCheck, d_atoms, d_bm1, d_bm2, d_bm3, d_energyVirials, d_forces, d_grids, d_partition, d_scaling_factors, d_selfEnergy, d_selfEnergy_FEP, d_selfEnergy_TI_1, d_selfEnergy_TI_2, d_trans, deviceID, deviceIndex, forwardPlans, gridsize, gridTexObjArrays, h_energyVirials, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::molecule, NAMD_bug(), natoms, num_used_grids, Molecule::numAtoms, Node::Object(), ReductionMgr::Object(), PmeGrid::order, order, pmeGrid, REDUCTIONS_GPURESIDENT, Node::simParameters, stream, transize, and ReductionMgr::willSubmit().

1307  :
1308  pmeGrid(pmeGrid_), deviceID(deviceID_), deviceIndex(deviceIndex_),
1309  natoms(0), d_atoms(0), d_forces(0),
1310  d_grids(0), gridsize(0),
1311  d_trans(0), transize(0),
1312  d_bm1(0), d_bm2(0), d_bm3(0),
1317 {
1318 // fprintf(stderr, "CudaPmeOneDevice constructor START ******************************************\n");
1319  const SimParameters& sim_params = *(Node::Object()->simParameters);
1321  // Determine how many grids we need for the alchemical route
1322  if (sim_params.alchOn) {
1323  num_used_grids = sim_params.alchGetNumOfPMEGrids();
1324  } else {
1325  num_used_grids = 1;
1326  }
1327  cudaCheck(cudaSetDevice(deviceID));
1328 
1329  // Check to see if the simulation and device is compatible with patch-level kernels. The results
1330  // will be worked in the PatchLevelPmeData field
1331  checkPatchLevelSimParamCompatibility(pmeGrid.order, true /* periodic Y */, true /* periodic Z */);
1333 
1334  if (!sim_params.CUDASOAintegrateMode) {
1335  NAMD_bug("CudaPmeOneDevice requires GPU-resident mode");
1336  }
1337  reductionGpuResident = ReductionMgr::Object()->willSubmit(REDUCTIONS_GPURESIDENT);
1338 
1339  // create our own CUDA stream
1340 #if CUDA_VERSION >= 5050 || defined(NAMD_HIP)
1341  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1342  int leastPriority, greatestPriority;
1343  cudaCheck(cudaDeviceGetStreamPriorityRange(&leastPriority, &greatestPriority));
1344  cudaCheck(cudaStreamCreateWithPriority(&stream, cudaStreamDefault, greatestPriority));
1345 #else
1346  cudaCheck(cudaStreamCreate(&stream));
1347 #endif
1348 
1349  allocate_host<EnergyVirial>(&h_energyVirials, num_used_grids);
1350  allocate_device<EnergyVirial>(&d_energyVirials, num_used_grids);
1351  allocate_device<float>(&d_scaling_factors, num_used_grids);
1352  allocate_device<double>(&d_selfEnergy, 1);
1353  if (sim_params.alchFepOn) {
1354  allocate_device<double>(&d_selfEnergy_FEP, 1);
1355  } else {
1356  d_selfEnergy_FEP = NULL;
1357  }
1358  if (sim_params.alchThermIntOn) {
1359  allocate_device<double>(&d_selfEnergy_TI_1, 1);
1360  allocate_device<double>(&d_selfEnergy_TI_2, 1);
1361  } else {
1362  d_selfEnergy_TI_1 = NULL;
1363  d_selfEnergy_TI_2 = NULL;
1364  }
1365 
1366  // create device buffer space for atom positions and forces
1367  // to be accessed externally through PatchData
1368  allocate_device<float4>(&d_atoms, num_used_grids * natoms);
1369  allocate_device<float3>(&d_forces, num_used_grids * natoms);
1370  if (sim_params.alchOn) {
1371  allocate_device<int>(&d_partition, natoms);
1372  } else {
1373  d_partition = NULL;
1374  }
1375 #ifdef NODEGROUP_FORCE_REGISTER
1376  DeviceData& devData = cpdata.ckLocalBranch()->devData[deviceIndex];
1377  devData.s_datoms = (CudaAtom *) (d_atoms);
1378  devData.f_slow = (CudaForce *) (d_forces);
1379  devData.f_slow_size = natoms;
1380  devData.s_datoms_partition = d_partition;
1381 #endif
1382  int k1 = pmeGrid.K1;
1383  int k2 = pmeGrid.K2;
1384  int k3 = pmeGrid.K3;
1385  int order = pmeGrid.order;
1386  gridsize = k1 * k2 * k3;
1387  transize = (k1/2 + 1) * k2 * k3;
1388 
1389 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1390 
1391  // set up cufft
1392  forwardPlans = new cufftHandle[num_used_grids];
1393  backwardPlans = new cufftHandle[num_used_grids];
1394  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1395  cufftCheck(cufftPlan3d(&(forwardPlans[iGrid]), k3, k2, k1, CUFFT_R2C));
1396  cufftCheck(cufftPlan3d(&(backwardPlans[iGrid]), k3, k2, k1, CUFFT_C2R));
1397  cufftCheck(cufftSetStream(forwardPlans[iGrid], stream));
1398  cufftCheck(cufftSetStream(backwardPlans[iGrid], stream));
1399  }
1400 #endif
1401 
1402 #ifdef NAMD_CUDA
1403  cudaDeviceProp deviceProp;
1404  cudaCheck(cudaGetDeviceProperties(&deviceProp, deviceID));
1405  const int texture_alignment = int(deviceProp.textureAlignment);
1406  // d_grids and d_grids + N * gridsize will be used as device pointers for ::cudaResourceDesc::res::linear::devPtr
1407  // check if (d_grids + N * gridsize) is an address aligned to ::cudaDeviceProp::textureAlignment
1408  // which is required by cudaCreateTextureObject()
1409  // or maybe I should use cudaMallocPitch()?
1410  if ((gridsize % texture_alignment) != 0) {
1411  // if it is not aligned, padding is required
1412  gridsize = (int(gridsize / texture_alignment) + 1) * texture_alignment;
1413  }
1414  // Is it necesary to align transize too?
1415 // if ((transize % texture_alignment) != 0) {
1416 // // if it is not aligned, padding is required
1417 // transize = (int(transize / texture_alignment) + 1) * texture_alignment;
1418 // }
1419  allocate_device<float>(&d_grids, num_used_grids * gridsize);
1420  allocate_device<float2>(&d_trans, num_used_grids * transize);
1421  gridTexObjArrays = new cudaTextureObject_t[num_used_grids];
1422  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1423  // set up texture object
1424  cudaResourceDesc resDesc;
1425  memset(&resDesc, 0, sizeof(resDesc));
1426  resDesc.resType = cudaResourceTypeLinear;
1427  resDesc.res.linear.devPtr = (void*)(d_grids + iGrid * (size_t)gridsize);
1428  resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
1429  resDesc.res.linear.desc.x = sizeof(float)*8;
1430  resDesc.res.linear.sizeInBytes = gridsize*sizeof(float);
1431  cudaTextureDesc texDesc;
1432  memset(&texDesc, 0, sizeof(texDesc));
1433  texDesc.readMode = cudaReadModeElementType;
1434  cudaCheck(cudaCreateTextureObject(&(gridTexObjArrays[iGrid]), &resDesc, &texDesc, NULL));
1435  }
1436 #else
1437  allocate_device<float>(&d_grids, num_used_grids * gridsize);
1438  allocate_device<float2>(&d_trans, num_used_grids * transize);
1439 #endif
1440  // calculate prefactors
1441  double *bm1 = new double[k1];
1442  double *bm2 = new double[k2];
1443  double *bm3 = new double[k3];
1444  // Use compute_b_moduli from PmeKSpace.C
1445  extern void compute_b_moduli(double *bm, int k, int order);
1446  compute_b_moduli(bm1, k1, order);
1447  compute_b_moduli(bm2, k2, order);
1448  compute_b_moduli(bm3, k3, order);
1449 
1450  // allocate space for and copy prefactors onto GPU
1451  float *bm1f = new float[k1];
1452  float *bm2f = new float[k2];
1453  float *bm3f = new float[k3];
1454  for (int i=0; i < k1; i++) bm1f[i] = (float) bm1[i];
1455  for (int i=0; i < k2; i++) bm2f[i] = (float) bm2[i];
1456  for (int i=0; i < k3; i++) bm3f[i] = (float) bm3[i];
1457  allocate_device<float>(&d_bm1, k1);
1458  allocate_device<float>(&d_bm2, k2);
1459  allocate_device<float>(&d_bm3, k3);
1460  copy_HtoD_sync<float>(bm1f, d_bm1, k1);
1461  copy_HtoD_sync<float>(bm2f, d_bm2, k2);
1462  copy_HtoD_sync<float>(bm3f, d_bm3, k3);
1463  delete [] bm1f;
1464  delete [] bm2f;
1465  delete [] bm3f;
1466  delete [] bm1;
1467  delete [] bm2;
1468  delete [] bm3;
1469 
1470  cudaCheck(cudaStreamSynchronize(stream));
1471 
1472 // fprintf(stderr, "CudaPmeOneDevice constructor END ********************************************\n");
1473 }
static Node * Object()
Definition: Node.h:86
cufftHandle * backwardPlans
void compute_b_moduli(double *bm, int K, int order)
Definition: PmeKSpace.C:42
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
Bool CUDASOAintegrateMode
EnergyVirial * d_energyVirials
#define cufftCheck(stmt)
SubmitReduction * willSubmit(int setID, int size=-1)
Definition: ReductionMgr.C:368
static ReductionMgr * Object(void)
Definition: ReductionMgr.h:290
void checkPatchLevelSimParamCompatibility(const int order, const bool periodicY, const bool periodicZ)
#define order
Definition: PmeRealSpace.C:235
int order
Definition: PmeBase.h:23
void NAMD_bug(const char *err_msg)
Definition: common.C:196
int numAtoms
Definition: Molecule.h:586
cudaTextureObject_t * gridTexObjArrays
int K3
Definition: PmeBase.h:21
#define cudaCheck(stmt)
Definition: CudaUtils.h:242
cufftHandle * forwardPlans
size_t alchGetNumOfPMEGrids() const
Molecule * molecule
Definition: Node.h:179
EnergyVirial * h_energyVirials
void checkPatchLevelDeviceCompatibility()

◆ ~CudaPmeOneDevice()

CudaPmeOneDevice::~CudaPmeOneDevice ( )

Definition at line 1475 of file CudaPmeSolverUtil.C.

References backwardPlans, cudaCheck, cufftCheck, d_atoms, d_bm1, d_bm2, d_bm3, d_energyVirials, d_forces, d_grids, d_partition, PatchLevelPmeData::d_patchGridOffsets, d_scaling_factors, d_selfEnergy, d_selfEnergy_FEP, d_selfEnergy_TI_1, d_selfEnergy_TI_2, d_trans, forwardPlans, gridTexObjArrays, h_energyVirials, PatchLevelPmeData::h_patchGridOffsets, num_used_grids, patchLevelPmeData, and stream.

1475  {
1476  deallocate_device<float4>(&d_atoms);
1477  deallocate_device<float3>(&d_forces);
1478  deallocate_device<float2>(&d_trans);
1479  deallocate_device<float>(&d_grids);
1480  deallocate_host<EnergyVirial>(&h_energyVirials);
1481  deallocate_device<EnergyVirial>(&d_energyVirials);
1482  deallocate_device<float>(&d_scaling_factors);
1483 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
1484  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1485  cufftCheck(cufftDestroy(forwardPlans[iGrid]));
1486  cufftCheck(cufftDestroy(backwardPlans[iGrid]));
1487 #if defined(NAMD_CUDA) // only CUDA uses texture objects
1488  cudaCheck(cudaDestroyTextureObject(gridTexObjArrays[iGrid]));
1489 #endif
1490  }
1491 
1492  if (patchLevelPmeData.h_patchGridOffsets != nullptr) {
1493  deallocate_host<int3>(&patchLevelPmeData.h_patchGridOffsets);
1494  }
1495  if (patchLevelPmeData.d_patchGridOffsets != nullptr) {
1496  deallocate_device<int3>(&patchLevelPmeData.d_patchGridOffsets);
1497  }
1498 
1499  delete[] forwardPlans;
1500  delete[] backwardPlans;
1501 #if defined(NAMD_CUDA) // only CUDA uses texture objects
1502  delete[] gridTexObjArrays;
1503 #endif
1504 
1505 
1506 #endif
1507  deallocate_device<double>(&d_selfEnergy);
1508  if (d_partition != NULL) deallocate_device<int>(&d_partition);
1509  if (d_selfEnergy_FEP != NULL) deallocate_device<double>(&d_selfEnergy_FEP);
1510  if (d_selfEnergy_TI_1 != NULL) deallocate_device<double>(&d_selfEnergy_TI_1);
1511  if (d_selfEnergy_TI_2 != NULL) deallocate_device<double>(&d_selfEnergy_TI_2);
1512  deallocate_device<float>(&d_bm1);
1513  deallocate_device<float>(&d_bm2);
1514  deallocate_device<float>(&d_bm3);
1515  cudaCheck(cudaStreamDestroy(stream));
1516 
1517  if (reductionGpuResident) {
1518  delete reductionGpuResident;
1519  }
1520 }
cufftHandle * backwardPlans
EnergyVirial * d_energyVirials
#define cufftCheck(stmt)
PatchLevelPmeData patchLevelPmeData
cudaTextureObject_t * gridTexObjArrays
#define cudaCheck(stmt)
Definition: CudaUtils.h:242
cufftHandle * forwardPlans
EnergyVirial * h_energyVirials

Member Function Documentation

◆ checkPatchLevelDeviceCompatibility()

void CudaPmeOneDevice::checkPatchLevelDeviceCompatibility ( )

Definition at line 2045 of file CudaPmeSolverUtil.C.

References computeSharedMemoryPatchLevelGatherForce(), computeSharedMemoryPatchLevelSpreadCharge(), PatchLevelPmeData::deviceCompatible, deviceID, PatchLevelPmeData::deviceMaxSharedBytes, PatchLevelPmeData::gatherForceSharedBytes, PatchLevelPmeData::kNumThreads, PatchLevelPmeData::kPatchGridDim, PatchLevelPmeData::kPatchGridDimPad, patchLevelPmeData, and PatchLevelPmeData::spreadChargeSharedBytes.

Referenced by CudaPmeOneDevice().

2045  {
2046  cudaDeviceGetAttribute(&patchLevelPmeData.deviceMaxSharedBytes, cudaDevAttrMaxSharedMemoryPerBlockOptin, deviceID);
2047 
2048  const int3 constexprPatchGridDim = make_int3(
2052 
2055  constexprPatchGridDim, 8 /* order */);
2058  constexprPatchGridDim, 8 /* order */);
2059 
2063 }
static constexpr int kNumThreads
PatchLevelPmeData patchLevelPmeData
int computeSharedMemoryPatchLevelSpreadCharge(const int numThreads, const int3 patchGridDim, const int order)
static constexpr int kPatchGridDimPad
static constexpr int kPatchGridDim
int computeSharedMemoryPatchLevelGatherForce(const int numThreads, const int3 patchGridDim, const int order)

◆ checkPatchLevelLatticeCompatibilityAndComputeOffsets()

void CudaPmeOneDevice::checkPatchLevelLatticeCompatibilityAndComputeOffsets ( const Lattice lattice,
const int  numPatches,
const CudaLocalRecord localRecords,
double3 *  patchMin,
double3 *  patchMax,
double3 *  awayDists 
)

Definition at line 2065 of file CudaPmeSolverUtil.C.

References Lattice::a(), Lattice::a_r(), Lattice::b(), Lattice::b_r(), Lattice::c(), Lattice::c_r(), currentLattice, PatchLevelPmeData::d_patchGridOffsets, PatchLevelPmeData::deviceCompatible, getShiftedGrid(), PatchLevelPmeData::h_patchGridOffsets, Lattice::isEqual(), PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, PatchLevelPmeData::kPatchGridDim, PatchLevelPmeData::latticeCompatible, PatchLevelPmeData::localRecords, PatchLevelPmeData::numPatches, Node::Object(), PmeGrid::order, order, PatchLevelPmeData::patchGridDim, patchLevelPmeData, pmeGrid, Lattice::scale(), Node::simParameters, simParams, PatchLevelPmeData::simulationCompatible, Vector::unit(), Lattice::unscale(), and Vector::x.

2067  {
2068 
2069  patchLevelPmeData.localRecords = localRecords;
2070 
2071  // If the simulation isn't compatible or the device isn't compatible then no point in checking
2072  // patch sizes
2074 
2075  patchLevelPmeData.numPatches = numPatches;
2076 
2077  if (patchLevelPmeData.h_patchGridOffsets == nullptr) {
2078  allocate_host<int3>(&patchLevelPmeData.h_patchGridOffsets, numPatches);
2079  }
2080  if (patchLevelPmeData.d_patchGridOffsets == nullptr) {
2081  allocate_device<int3>(&patchLevelPmeData.d_patchGridOffsets, numPatches);
2082  }
2083 
2085  const int order = pmeGrid.order;
2086 
2087  // We only need to recompute the grid offsets if the lattice has changed
2088  if (!lattice.isEqual(currentLattice)) {
2089  currentLattice = lattice;
2090 
2091  double sysdima = currentLattice.a_r().unit() * currentLattice.a();
2092  double sysdimb = currentLattice.b_r().unit() * currentLattice.b();
2093  double sysdimc = currentLattice.c_r().unit() * currentLattice.c();
2094 
2095  patchLevelPmeData.patchGridDim = make_int3(0,0,0);
2096 
2097  for (int i = 0; i < numPatches; i++) {
2098  double3 pmin = currentLattice.unscale(patchMin[i]);
2099  double3 pmax = currentLattice.unscale(patchMax[i]);
2100  double3 width = pmax - pmin;
2101 
2102  // Logic copied from margin violation check
2103  double3 marginVal;
2104  marginVal.x = 0.5 * (awayDists[i].x - simParams->cutoff / sysdima);
2105  marginVal.y = 0.5 * (awayDists[i].y - simParams->cutoff / sysdimb);
2106  marginVal.z = 0.5 * (awayDists[i].z - simParams->cutoff / sysdimc);
2107  marginVal = currentLattice.unscale(marginVal);
2108 
2109  double3 minAtom = pmin - marginVal;
2110  double3 maxAtom = pmax + marginVal;
2111 
2112  double3 minScaled = currentLattice.scale(minAtom);
2113  double3 maxScaled = currentLattice.scale(maxAtom);
2114 
2115  int3 gridMin;
2116  gridMin.x = getShiftedGrid(minScaled.x, pmeGrid.K1);
2117  gridMin.y = getShiftedGrid(minScaled.y, pmeGrid.K2);
2118  gridMin.z = getShiftedGrid(minScaled.z, pmeGrid.K3);
2119 
2120  int3 gridMax;
2121  gridMax.x = getShiftedGrid(maxScaled.x, pmeGrid.K1);
2122  gridMax.y = getShiftedGrid(maxScaled.y, pmeGrid.K2);
2123  gridMax.z = getShiftedGrid(maxScaled.z, pmeGrid.K3);
2124 
2125  int3 gridWidth;
2126  gridWidth.x = gridMax.x - gridMin.x + order;
2127  gridWidth.y = gridMax.y - gridMin.y + order;
2128  gridWidth.z = gridMax.z - gridMin.z + order;
2129 
2131  patchLevelPmeData.patchGridDim.x = std::max(patchLevelPmeData.patchGridDim.x, gridWidth.x);
2132  patchLevelPmeData.patchGridDim.y = std::max(patchLevelPmeData.patchGridDim.y, gridWidth.y);
2133  patchLevelPmeData.patchGridDim.z = std::max(patchLevelPmeData.patchGridDim.z, gridWidth.z);
2134  }
2136  numPatches, nullptr);
2137  cudaStreamSynchronize(nullptr);
2138  const int maxGridPoints = patchLevelPmeData.patchGridDim.x *
2140 
2145  }
2146 }
static Node * Object()
Definition: Node.h:86
bool isEqual(const Lattice &other) const
Definition: Lattice.h:298
NAMD_HOST_DEVICE Vector c() const
Definition: Lattice.h:270
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
NAMD_HOST_DEVICE Position unscale(ScaledPosition s) const
Definition: Lattice.h:77
PatchLevelPmeData patchLevelPmeData
#define order
Definition: PmeRealSpace.C:235
int order
Definition: PmeBase.h:23
NAMD_HOST_DEVICE ScaledPosition scale(Position p) const
Definition: Lattice.h:83
BigReal x
Definition: Vector.h:74
NAMD_HOST_DEVICE Vector a_r() const
Definition: Lattice.h:284
NAMD_HOST_DEVICE Vector b_r() const
Definition: Lattice.h:285
NAMD_HOST_DEVICE Vector c_r() const
Definition: Lattice.h:286
NAMD_HOST_DEVICE Vector b() const
Definition: Lattice.h:269
#define simParams
Definition: Output.C:131
int K3
Definition: PmeBase.h:21
const CudaLocalRecord * localRecords
static constexpr int kPatchGridDim
NAMD_HOST_DEVICE Vector a() const
Definition: Lattice.h:268
NAMD_HOST_DEVICE Vector unit(void) const
Definition: Vector.h:215
int getShiftedGrid(const double x, const int grid)

◆ checkPatchLevelSimParamCompatibility()

void CudaPmeOneDevice::checkPatchLevelSimParamCompatibility ( const int  order,
const bool  periodicY,
const bool  periodicZ 
)

Definition at line 2034 of file CudaPmeSolverUtil.C.

References deviceCUDA, DeviceCUDA::getNumDevice(), order, patchLevelPmeData, and PatchLevelPmeData::simulationCompatible.

Referenced by CudaPmeOneDevice().

2034  {
2035  bool use = true;
2036  use = use && (order == 8);
2037  use = use && (periodicY);
2038  use = use && (periodicZ);
2039 
2040  use = use && (deviceCUDA->getNumDevice() == 1); // This is only supported for single GPU currently
2041 
2043 }
int getNumDevice()
Definition: DeviceCUDA.h:125
PatchLevelPmeData patchLevelPmeData
#define order
Definition: PmeRealSpace.C:235
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23

◆ compute()

void CudaPmeOneDevice::compute ( const Lattice lattice,
int  doEnergyVirial,
int  step 
)

Definition at line 1522 of file CudaPmeSolverUtil.C.

References Lattice::a_r(), SimParameters::alchOn, Lattice::b_r(), backwardPlans, Lattice::c_r(), compute_selfEnergy(), cudaCheck, cufftCheck, d_atoms, d_bm1, d_bm2, d_bm3, d_energyVirials, d_forces, d_grids, d_selfEnergy, d_trans, deviceID, SimParameters::firstTimestep, forwardPlans, gather_force(), gridsize, gridTexObjArrays, h_energyVirials, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, kappa, m_step, natoms, num_used_grids, Node::Object(), PmeGrid::order, order, patchLevelPmeData, pmeGrid, scalar_sum(), selfEnergy, Node::simParameters, spread_charge(), spread_charge_v2(), stream, transize, Lattice::volume(), WARPSIZE, Vector::x, Vector::y, and Vector::z.

1533  {
1534 // fprintf(stderr, "CudaPmeOneDevice compute ****************************************************\n");
1535  int k1 = pmeGrid.K1;
1536  int k2 = pmeGrid.K2;
1537  int k3 = pmeGrid.K3;
1538  int order = pmeGrid.order;
1539  double volume = lattice.volume();
1540  Vector a_r = lattice.a_r();
1541  Vector b_r = lattice.b_r();
1542  Vector c_r = lattice.c_r();
1543  float arx = a_r.x;
1544  float ary = a_r.y;
1545  float arz = a_r.z;
1546  float brx = b_r.x;
1547  float bry = b_r.y;
1548  float brz = b_r.z;
1549  float crx = c_r.x;
1550  float cry = c_r.y;
1551  float crz = c_r.z;
1552  m_step = step;
1553 
1554  //JM: actually necessary if you reserve a PME device!
1555  cudaCheck(cudaSetDevice(deviceID));
1556  const SimParameters& sim_params = *(Node::Object()->simParameters);
1557 
1558  // clear force array
1559  //fprintf(stderr, "Calling clear_device_array on d_force\n");
1560  clear_device_array<float3>(d_forces, num_used_grids * natoms, stream);
1561  // clear grid
1562  //fprintf(stderr, "Calling clear_device_array on d_grid\n");
1563  clear_device_array<float>(d_grids, num_used_grids * gridsize, stream);
1564  clear_device_array<float2>(d_trans, num_used_grids * transize, stream);
1565 
1566  // Clear energy and virial array if needed
1567  if (doEnergyVirial) {
1568  // clear_device_array<EnergyVirial>(d_energyVirial, 1, stream);
1569  clear_device_array<EnergyVirial>(d_energyVirials, num_used_grids * 1, stream);
1570  const bool updateSelfEnergy = (step == sim_params.firstTimestep) || (selfEnergy == 0);
1571  if (updateSelfEnergy && (sim_params.alchOn == false)) {
1572  clear_device_array<double>(d_selfEnergy, 1, stream);
1573  // calculate self energy term if not yet done
1575  kappa, stream);
1576  //fprintf(stderr, "selfEnergy = %12.8f\n", selfEnergy);
1577  }
1578  /* the self energy depends on the scaling factor, or lambda
1579  * the cases when self energy will be changed:
1580  * 1. If alchLambdaFreq > 0, we will have a linear scaling of lambda. Lambda is changed EVERY STEP!
1581  * 2. In most cases, users will not use alchLambdaFreq > 0, but simulations may enter another lambda-window by using TCL scripts.
1582  * in summary, the self energy will be not changed unless lambda is changed.
1583  * so calcSelfEnergyAlch() would compare lambda of current step with the one from last step.
1584  * only if lambda is changed, the calcSelfEnergyFEPKernel or calcSelfEnergyTIKernel will be executed again.
1585  */
1586  if (sim_params.alchOn) calcSelfEnergyAlch(m_step);
1587  }
1588 
1589 #if 0
1590 
1591  spread_charge(d_atoms, natoms, k1, k2, k3, k1, k2, k3,
1592  k1 /* xsize */, 0 /* jBlock */, 0 /* kBlock */,
1593  true /* pmeGrid.yBlocks == 1 */, true /* pmeGrid.zBlocks == 1 */,
1594  d_grid, order, stream);
1595 #else
1596  const int order3 = ((order*order*order-1)/WARPSIZE + 1)*WARPSIZE;
1597  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1599  d_atoms + iGrid * natoms, natoms, k1, k2, k3,
1600  float(k1), (float)k2, (float)k3, order3,
1601  k1, k2, k3,
1602  k1 /* xsize */, 0 /* jBlock */, 0 /* kBlock */,
1603  true /* pmeGrid.yBlocks == 1 */, true /* pmeGrid.zBlocks == 1 */,
1604  d_grids + iGrid * gridsize, order, stream);
1605  }
1606 
1607 #endif
1608  //cudaCheck(cudaStreamSynchronize(stream));
1609 
1610  // forward FFT
1611  //fprintf(stderr, "Calling cufftExecR2C\n");
1612  //cufftCheck(cufftExecR2C(forwardPlan, (cufftReal *)d_grid,
1613  // (cufftComplex *)d_tran));
1614 
1615  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1616  cufftCheck(cufftExecR2C(forwardPlans[iGrid],
1617  (cufftReal *)(d_grids + iGrid * gridsize),
1618  (cufftComplex *)(d_trans + iGrid * transize)));
1619  }
1620 
1621  //cudaCheck(cudaStreamSynchronize(stream));
1622 
1623  // reciprocal space calculation
1624  //fprintf(stderr, "Calling scalar_sum\n");
1625  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1626  scalar_sum(true /* Perm_cX_Y_Z */, k1, k2, k3, (k1/2 + 1), k2, k3,
1627  kappa, arx, ary, arz, brx, bry, brz, crx, cry, crz, volume,
1628  d_bm1, d_bm2, d_bm3, 0 /* jBlock */, 0 /* kBlock */,
1629  (bool) doEnergyVirial, &(d_energyVirials[iGrid].energy),
1630  d_energyVirials[iGrid].virial, d_trans + iGrid * transize, stream);
1631  }
1632  //scalar_sum(true /* Perm_cX_Y_Z */, k1, k2, k3, (k1/2 + 1), k2, k3,
1633  // kappa, arx, ary, arz, brx, bry, brz, crx, cry, crz, volume,
1634  // d_bm1, d_bm2, d_bm3, 0 /* jBlock */, 0 /* kBlock */,
1635  // (bool) doEnergyVirial, &(d_energyVirial->energy),
1636  // d_energyVirial->virial, d_tran, stream);
1637  //cudaCheck(cudaStreamSynchronize(stream));
1638 
1639  // backward FFT
1640  //fprintf(stderr, "Calling cufftExecC2R\n");
1641  for (size_t iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1642  cufftCheck(cufftExecC2R(backwardPlans[iGrid],
1643  (cufftComplex *)(d_trans + iGrid * transize),
1644  (cufftReal *)(d_grids + iGrid * gridsize)));
1645  }
1646 
1647  //cufftCheck(cufftExecC2R(backwardPlan, (cufftComplex *)d_tran,
1648  // (cufftReal *)d_grid));
1649  //cudaCheck(cudaStreamSynchronize(stream));
1650 
1651  // gather force from grid to atoms
1652  // missing cudaTextureObject_t below works for __CUDA_ARCH__ >= 350
1653  //fprintf(stderr, "Calling gather_force\n");
1654  for (unsigned int iGrid = 0; iGrid < num_used_grids; ++iGrid) {
1656  &(d_atoms[iGrid * natoms]), natoms, k1, k2, k3, k1, k2, k3,
1657  k1 /* xsize */, 0 /* jBlock */, 0 /* kBlock */,
1658  true /* pmeGrid.yBlocks == 1 */, true /* pmeGrid.zBlocks == 1 */,
1659  d_grids + iGrid * gridsize, order, d_forces + iGrid * natoms,
1660 #ifdef NAMD_CUDA
1661  gridTexObjArrays[iGrid] /* cudaTextureObject_t */,
1662 #endif
1663  stream);
1664  }
1665 
1666  //gather_force(d_atoms, natoms, k1, k2, k3, k1, k2, k3,
1667  // k1 /* xsize */, 0 /* jBlock */, 0 /* kBlock */,
1668  // true /* pmeGrid.yBlocks == 1 */, true /* pmeGrid.zBlocks == 1 */,
1669  // d_grid, order, d_force, gridTexObj /* cudaTextureObject_t */,
1670  // stream);
1671  //cudaCheck(cudaStreamSynchronize(stream));
1672 
1673  // Copy energy and virial to host if needed
1674  if (doEnergyVirial) {
1675  //fprintf(stderr, "Calling copy_DtoH on d_energyVirial\n");
1676  copy_DtoH<EnergyVirial>(d_energyVirials, h_energyVirials,
1678  //cudaCheck(cudaEventRecord(copyEnergyVirialEvent, stream));
1679  //cudaCheck(cudaStreamSynchronize(stream));
1680  }
1681 
1682  // XXX debugging, quick test for borked forces
1683  //clear_device_array<float3>(d_force, natoms, stream);
1684  if (sim_params.alchOn) {
1685  scaleAndMergeForce(m_step);
1686  }
1687 }
static Node * Object()
Definition: Node.h:86
void spread_charge_v2(const PatchLevelPmeData patchLevelPmeData, const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const float nfftx_f, const float nffty_f, const float nfftz_f, const int order3, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, float *data, const int order, cudaStream_t stream)
cufftHandle * backwardPlans
Definition: Vector.h:72
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
EnergyVirial * d_energyVirials
BigReal z
Definition: Vector.h:74
#define cufftCheck(stmt)
void scalar_sum(const bool orderXYZ, const int nfft1, const int nfft2, const int nfft3, const int size1, const int size2, const int size3, const double kappa, const float recip1x, const float recip1y, const float recip1z, const float recip2x, const float recip2y, const float recip2z, const float recip3x, const float recip3y, const float recip3z, const double volume, const float *prefac1, const float *prefac2, const float *prefac3, const int k2_00, const int k3_00, const bool doEnergyVirial, double *energy, double *virial, float2 *data, cudaStream_t stream)
#define WARPSIZE
Definition: CudaUtils.h:17
PatchLevelPmeData patchLevelPmeData
#define order
Definition: PmeRealSpace.C:235
int order
Definition: PmeBase.h:23
BigReal x
Definition: Vector.h:74
void gather_force(const PatchLevelPmeData patchLevelPmeData, const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, const float *data, const int order, float3 *force, const cudaTextureObject_t gridTexObj, cudaStream_t stream)
NAMD_HOST_DEVICE BigReal volume(void) const
Definition: Lattice.h:293
NAMD_HOST_DEVICE Vector a_r() const
Definition: Lattice.h:284
NAMD_HOST_DEVICE Vector b_r() const
Definition: Lattice.h:285
cudaTextureObject_t * gridTexObjArrays
NAMD_HOST_DEVICE Vector c_r() const
Definition: Lattice.h:286
double compute_selfEnergy(double *d_selfEnergy, const float4 *d_atoms, int natoms, double ewaldcof, cudaStream_t stream)
int K3
Definition: PmeBase.h:21
void spread_charge(const float4 *atoms, const int numAtoms, const int nfftx, const int nffty, const int nfftz, const int xsize, const int ysize, const int zsize, const int xdim, const int y00, const int z00, const bool periodicY, const bool periodicZ, float *data, const int order, cudaStream_t stream)
BigReal y
Definition: Vector.h:74
#define cudaCheck(stmt)
Definition: CudaUtils.h:242
cufftHandle * forwardPlans
EnergyVirial * h_energyVirials

◆ computeSharedMemoryPatchLevelGatherForce()

int CudaPmeOneDevice::computeSharedMemoryPatchLevelGatherForce ( const int  numThreads,
const int3  patchGridDim,
const int  order 
)

Definition at line 2024 of file CudaPmeSolverUtil.C.

References PatchLevelPmeData::kThetaPad, and order.

Referenced by checkPatchLevelDeviceCompatibility().

2025  {
2026 
2027  const int gridBytes = patchGridDim.x * patchGridDim.y * patchGridDim.z * sizeof(float);
2028  const int thetaBytes = (numThreads + PatchLevelPmeData::kThetaPad) * order *
2029  2 /* theta and dtheta */ * sizeof(float);
2030 
2031  return gridBytes + thetaBytes;
2032 }
#define order
Definition: PmeRealSpace.C:235
static constexpr int kThetaPad

◆ computeSharedMemoryPatchLevelSpreadCharge()

int CudaPmeOneDevice::computeSharedMemoryPatchLevelSpreadCharge ( const int  numThreads,
const int3  patchGridDim,
const int  order 
)

Definition at line 2013 of file CudaPmeSolverUtil.C.

References PatchLevelPmeData::kDim, PatchLevelPmeData::kThetaPad, and order.

Referenced by checkPatchLevelDeviceCompatibility().

2014  {
2015 
2016  const int gridBytes = patchGridDim.x * patchGridDim.y * patchGridDim.z * sizeof(float);
2017  const int thetaBytes = PatchLevelPmeData::kDim * (numThreads + PatchLevelPmeData::kThetaPad) *
2018  order * sizeof(float);
2019  const int indexBytes = numThreads * sizeof(char4);
2020 
2021  return gridBytes + thetaBytes + indexBytes;
2022 }
#define order
Definition: PmeRealSpace.C:235
static constexpr int kDim
static constexpr int kThetaPad

◆ finishReduction()

void CudaPmeOneDevice::finishReduction ( bool  doEnergyVirial)

Definition at line 1691 of file CudaPmeSolverUtil.C.

References SimParameters::alchFepOn, SimParameters::alchOn, SimParameters::alchThermIntOn, cudaCheck, deviceID, CudaPmeOneDevice::EnergyVirial::energy, h_energyVirials, SubmitReduction::item(), m_step, Node::Object(), REDUCTION_ELECT_ENERGY_SLOW, REDUCTION_ELECT_ENERGY_SLOW_F, REDUCTION_ELECT_ENERGY_SLOW_TI_1, REDUCTION_ELECT_ENERGY_SLOW_TI_2, selfEnergy, selfEnergy_FEP, selfEnergy_TI_1, selfEnergy_TI_2, Node::simParameters, stream, SubmitReduction::submit(), and CudaPmeOneDevice::EnergyVirial::virial.

1693  {
1694  cudaCheck(cudaStreamSynchronize(stream));
1695  SubmitReduction *reduction = getCurrentReduction();
1696  if(doEnergyVirial){
1697  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1698  PatchData* patchData = cpdata.ckLocalBranch();
1699  cudaCheck(cudaSetDevice(deviceID));
1700  double virial[9];
1701  double energy, energy_F, energy_TI_1, energy_TI_2;
1702  const SimParameters& sim_params = *(Node::Object()->simParameters);
1703  if (sim_params.alchOn) {
1704  if (sim_params.alchFepOn) {
1705  scaleAndComputeFEPEnergyVirials(h_energyVirials, m_step, energy, energy_F, virial);
1706  energy += selfEnergy;
1707  energy_F += selfEnergy_FEP;
1708  }
1709  if (sim_params.alchThermIntOn) {
1710  scaleAndComputeTIEnergyVirials(h_energyVirials, m_step, energy, energy_TI_1, energy_TI_2, virial);
1711  energy += selfEnergy;
1712  energy_TI_1 += selfEnergy_TI_1;
1713  energy_TI_2 += selfEnergy_TI_2;
1714  }
1715  } else {
1716  virial[0] = h_energyVirials[0].virial[0];
1717  virial[1] = h_energyVirials[0].virial[1];
1718  virial[2] = h_energyVirials[0].virial[2];
1719  virial[3] = h_energyVirials[0].virial[1];
1720  virial[4] = h_energyVirials[0].virial[3];
1721  virial[5] = h_energyVirials[0].virial[4];
1722  virial[6] = h_energyVirials[0].virial[2];
1723  virial[7] = h_energyVirials[0].virial[4];
1724  virial[8] = h_energyVirials[0].virial[5];
1725  energy = h_energyVirials[0].energy + selfEnergy;
1726  }
1727  #if 0
1728  fprintf(stderr, "PME ENERGY = %g %g\n", h_energyVirials[0].energy, selfEnergy );
1729  fprintf(stderr, "PME VIRIAL =\n"
1730  " %g %g %g\n %g %g %g\n %g %g %g\n",
1731  virial[0], virial[1], virial[2], virial[3], virial[4],
1732  virial[5], virial[6], virial[7], virial[8]);
1733  #endif
1734  reduction->item(REDUCTION_VIRIAL_SLOW_XX) += virial[0];
1735  reduction->item(REDUCTION_VIRIAL_SLOW_XY) += virial[1];
1736  reduction->item(REDUCTION_VIRIAL_SLOW_XZ) += virial[2];
1737  reduction->item(REDUCTION_VIRIAL_SLOW_YX) += virial[3];
1738  reduction->item(REDUCTION_VIRIAL_SLOW_YY) += virial[4];
1739  reduction->item(REDUCTION_VIRIAL_SLOW_YZ) += virial[5];
1740  reduction->item(REDUCTION_VIRIAL_SLOW_ZX) += virial[6];
1741  reduction->item(REDUCTION_VIRIAL_SLOW_ZY) += virial[7];
1742  reduction->item(REDUCTION_VIRIAL_SLOW_ZZ) += virial[8];
1743  reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += energy;
1744  if (sim_params.alchFepOn) {
1745  reduction->item(REDUCTION_ELECT_ENERGY_SLOW_F) += energy_F;
1746  }
1747  if (sim_params.alchThermIntOn) {
1748  reduction->item(REDUCTION_ELECT_ENERGY_SLOW_TI_1) += energy_TI_1;
1749  reduction->item(REDUCTION_ELECT_ENERGY_SLOW_TI_2) += energy_TI_2;
1750  }
1751  }
1752  reduction->submit();
1753 }
static Node * Object()
Definition: Node.h:86
virtual void submit(void)=0
SimParameters * simParameters
Definition: Node.h:181
BigReal & item(int i)
Definition: ReductionMgr.h:336
#define cudaCheck(stmt)
Definition: CudaUtils.h:242
EnergyVirial * h_energyVirials

◆ getShiftedGrid()

int CudaPmeOneDevice::getShiftedGrid ( const double  x,
const int  grid 
)

Definition at line 2007 of file CudaPmeSolverUtil.C.

Referenced by checkPatchLevelLatticeCompatibilityAndComputeOffsets().

2007  {
2008  double w = x + 0.5;
2009  double gw = w * grid;
2010  return floor(gw);
2011 }

Member Data Documentation

◆ backwardPlans

cufftHandle* CudaPmeOneDevice::backwardPlans

Definition at line 238 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ currentLattice

Lattice CudaPmeOneDevice::currentLattice

◆ d_atoms

float4* CudaPmeOneDevice::d_atoms

Definition at line 219 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_bm1

float* CudaPmeOneDevice::d_bm1

Definition at line 241 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_bm2

float* CudaPmeOneDevice::d_bm2

Definition at line 242 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_bm3

float* CudaPmeOneDevice::d_bm3

Definition at line 243 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_energyVirials

EnergyVirial* CudaPmeOneDevice::d_energyVirials

Definition at line 251 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_forces

float3* CudaPmeOneDevice::d_forces

Definition at line 221 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_grids

float* CudaPmeOneDevice::d_grids

on device grid of charge before forward FFT R->C, then grid of potential after backward FFT C->R

Definition at line 227 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_partition

int* CudaPmeOneDevice::d_partition

Definition at line 220 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_scaling_factors

float* CudaPmeOneDevice::d_scaling_factors

Definition at line 222 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_selfEnergy

double* CudaPmeOneDevice::d_selfEnergy

Definition at line 256 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_selfEnergy_FEP

double* CudaPmeOneDevice::d_selfEnergy_FEP

Definition at line 257 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_selfEnergy_TI_1

double* CudaPmeOneDevice::d_selfEnergy_TI_1

Definition at line 258 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_selfEnergy_TI_2

double* CudaPmeOneDevice::d_selfEnergy_TI_2

Definition at line 259 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ d_trans

float2* CudaPmeOneDevice::d_trans

on device FFT transformation to complex

Definition at line 231 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ deviceID

int CudaPmeOneDevice::deviceID

◆ deviceIndex

int CudaPmeOneDevice::deviceIndex

Definition at line 213 of file CudaPmeSolverUtil.h.

Referenced by CudaPmeOneDevice().

◆ force_scaling_alch_first_time

bool CudaPmeOneDevice::force_scaling_alch_first_time

Definition at line 255 of file CudaPmeSolverUtil.h.

◆ forwardPlans

cufftHandle* CudaPmeOneDevice::forwardPlans

Definition at line 237 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ gridsize

size_t CudaPmeOneDevice::gridsize

Definition at line 233 of file CudaPmeSolverUtil.h.

Referenced by compute(), and CudaPmeOneDevice().

◆ gridTexObjArrays

cudaTextureObject_t* CudaPmeOneDevice::gridTexObjArrays

Definition at line 224 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ h_energyVirials

EnergyVirial* CudaPmeOneDevice::h_energyVirials

Definition at line 252 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), finishReduction(), and ~CudaPmeOneDevice().

◆ kappa

double CudaPmeOneDevice::kappa

Definition at line 245 of file CudaPmeSolverUtil.h.

Referenced by compute().

◆ m_step

int CudaPmeOneDevice::m_step

Definition at line 264 of file CudaPmeSolverUtil.h.

Referenced by compute(), and finishReduction().

◆ natoms

int CudaPmeOneDevice::natoms

Definition at line 216 of file CudaPmeSolverUtil.h.

Referenced by compute(), and CudaPmeOneDevice().

◆ num_used_grids

size_t CudaPmeOneDevice::num_used_grids

Definition at line 217 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), and ~CudaPmeOneDevice().

◆ patchLevelPmeData

PatchLevelPmeData CudaPmeOneDevice::patchLevelPmeData

◆ pmeGrid

PmeGrid CudaPmeOneDevice::pmeGrid

◆ self_energy_alch_first_time

bool CudaPmeOneDevice::self_energy_alch_first_time

Definition at line 254 of file CudaPmeSolverUtil.h.

◆ selfEnergy

double CudaPmeOneDevice::selfEnergy

Definition at line 260 of file CudaPmeSolverUtil.h.

Referenced by compute(), and finishReduction().

◆ selfEnergy_FEP

double CudaPmeOneDevice::selfEnergy_FEP

Definition at line 261 of file CudaPmeSolverUtil.h.

Referenced by finishReduction().

◆ selfEnergy_TI_1

double CudaPmeOneDevice::selfEnergy_TI_1

Definition at line 262 of file CudaPmeSolverUtil.h.

Referenced by finishReduction().

◆ selfEnergy_TI_2

double CudaPmeOneDevice::selfEnergy_TI_2

Definition at line 263 of file CudaPmeSolverUtil.h.

Referenced by finishReduction().

◆ stream

cudaStream_t CudaPmeOneDevice::stream

Definition at line 214 of file CudaPmeSolverUtil.h.

Referenced by compute(), CudaPmeOneDevice(), finishReduction(), and ~CudaPmeOneDevice().

◆ transize

size_t CudaPmeOneDevice::transize

Definition at line 234 of file CudaPmeSolverUtil.h.

Referenced by compute(), and CudaPmeOneDevice().


The documentation for this class was generated from the following files: