NAMD
Classes | Public Member Functions | List of all members
CudaPmeKSpaceCompute Class Reference

#include <CudaPmeSolverUtil.h>

Inheritance diagram for CudaPmeKSpaceCompute:
PmeKSpaceCompute

Public Member Functions

 CudaPmeKSpaceCompute (PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, double kappa, int deviceID, cudaStream_t stream, unsigned int iGrid=0)
 
 ~CudaPmeKSpaceCompute ()
 
void solve (Lattice &lattice, const bool doEnergy, const bool doVirial, float *data)
 
void waitEnergyAndVirial ()
 
double getEnergy ()
 
void getVirial (double *virial)
 
void energyAndVirialSetCallback (CudaPmePencilXYZ *pencilPtr)
 
void energyAndVirialSetCallback (CudaPmePencilZ *pencilPtr)
 
- Public Member Functions inherited from PmeKSpaceCompute
 PmeKSpaceCompute (PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, double kappa, unsigned int multipleGridIndex=0)
 
virtual ~PmeKSpaceCompute ()
 
virtual void setGrid (unsigned int iGrid)
 

Additional Inherited Members

- Protected Attributes inherited from PmeKSpaceCompute
PmeGrid pmeGrid
 
double * bm1
 
double * bm2
 
double * bm3
 
double kappa
 
const int permutation
 
const int jblock
 
const int kblock
 
int size1
 
int size2
 
int size3
 
int j0
 
int k0
 
unsigned int multipleGridIndex
 

Detailed Description

Definition at line 73 of file CudaPmeSolverUtil.h.

Constructor & Destructor Documentation

◆ CudaPmeKSpaceCompute()

CudaPmeKSpaceCompute::CudaPmeKSpaceCompute ( PmeGrid  pmeGrid,
const int  permutation,
const int  jblock,
const int  kblock,
double  kappa,
int  deviceID,
cudaStream_t  stream,
unsigned int  iGrid = 0 
)

Definition at line 236 of file CudaPmeSolverUtil.C.

References PmeKSpaceCompute::bm1, PmeKSpaceCompute::bm2, PmeKSpaceCompute::bm3, cudaCheck, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, and PmeKSpaceCompute::pmeGrid.

237  :
239  deviceID(deviceID), stream(stream) {
240 
241  cudaCheck(cudaSetDevice(deviceID));
242 
243  // Copy bm1 -> prefac_x on GPU memory
244  float *bm1f = new float[pmeGrid.K1];
245  float *bm2f = new float[pmeGrid.K2];
246  float *bm3f = new float[pmeGrid.K3];
247  for (int i=0;i < pmeGrid.K1;i++) bm1f[i] = (float)bm1[i];
248  for (int i=0;i < pmeGrid.K2;i++) bm2f[i] = (float)bm2[i];
249  for (int i=0;i < pmeGrid.K3;i++) bm3f[i] = (float)bm3[i];
250  allocate_device<float>(&d_bm1, pmeGrid.K1);
251  allocate_device<float>(&d_bm2, pmeGrid.K2);
252  allocate_device<float>(&d_bm3, pmeGrid.K3);
253  copy_HtoD_sync<float>(bm1f, d_bm1, pmeGrid.K1);
254  copy_HtoD_sync<float>(bm2f, d_bm2, pmeGrid.K2);
255  copy_HtoD_sync<float>(bm3f, d_bm3, pmeGrid.K3);
256  delete [] bm1f;
257  delete [] bm2f;
258  delete [] bm3f;
259  allocate_device<EnergyVirial>(&d_energyVirial, 1);
260  allocate_host<EnergyVirial>(&h_energyVirial, 1);
261  // cudaCheck(cudaEventCreateWithFlags(&copyEnergyVirialEvent, cudaEventDisableTiming));
262  cudaCheck(cudaEventCreate(&copyEnergyVirialEvent));
263  // ncall = 0;
264 }
int K2
Definition: PmeBase.h:21
int K1
Definition: PmeBase.h:21
int K3
Definition: PmeBase.h:21
const int permutation
#define cudaCheck(stmt)
Definition: CudaUtils.h:242
PmeKSpaceCompute(PmeGrid pmeGrid, const int permutation, const int jblock, const int kblock, double kappa, unsigned int multipleGridIndex=0)

◆ ~CudaPmeKSpaceCompute()

CudaPmeKSpaceCompute::~CudaPmeKSpaceCompute ( )

Definition at line 266 of file CudaPmeSolverUtil.C.

References cudaCheck.

266  {
267  cudaCheck(cudaSetDevice(deviceID));
268  deallocate_device<float>(&d_bm1);
269  deallocate_device<float>(&d_bm2);
270  deallocate_device<float>(&d_bm3);
271  deallocate_device<EnergyVirial>(&d_energyVirial);
272  deallocate_host<EnergyVirial>(&h_energyVirial);
273  cudaCheck(cudaEventDestroy(copyEnergyVirialEvent));
274 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242

Member Function Documentation

◆ energyAndVirialSetCallback() [1/2]

void CudaPmeKSpaceCompute::energyAndVirialSetCallback ( CudaPmePencilXYZ pencilPtr)

Definition at line 477 of file CudaPmeSolverUtil.C.

References CcdCallBacksReset(), and cudaCheck.

477  {
478  cudaCheck(cudaSetDevice(deviceID));
479  pencilXYZPtr = pencilPtr;
480  pencilZPtr = NULL;
481  checkCount = 0;
482  CcdCallBacksReset(0, CmiWallTimer());
483  // Set the call back at 0.1ms
484  CcdCallFnAfter(energyAndVirialCheck, this, 0.1);
485 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242
void CcdCallBacksReset(void *ignored, double curWallTime)

◆ energyAndVirialSetCallback() [2/2]

void CudaPmeKSpaceCompute::energyAndVirialSetCallback ( CudaPmePencilZ pencilPtr)

Definition at line 487 of file CudaPmeSolverUtil.C.

References CcdCallBacksReset(), and cudaCheck.

487  {
488  cudaCheck(cudaSetDevice(deviceID));
489  pencilXYZPtr = NULL;
490  pencilZPtr = pencilPtr;
491  checkCount = 0;
492  CcdCallBacksReset(0, CmiWallTimer());
493  // Set the call back at 0.1ms
494  CcdCallFnAfter(energyAndVirialCheck, this, 0.1);
495 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:242
void CcdCallBacksReset(void *ignored, double curWallTime)

◆ getEnergy()

double CudaPmeKSpaceCompute::getEnergy ( )
virtual

Implements PmeKSpaceCompute.

Definition at line 497 of file CudaPmeSolverUtil.C.

497  {
498  return h_energyVirial->energy;
499 }

◆ getVirial()

void CudaPmeKSpaceCompute::getVirial ( double *  virial)
virtual

Implements PmeKSpaceCompute.

Definition at line 501 of file CudaPmeSolverUtil.C.

References Perm_cX_Y_Z, Perm_Z_cX_Y, and PmeKSpaceCompute::permutation.

501  {
502  if (permutation == Perm_Z_cX_Y) {
503  // h_energyVirial->virial is storing ZZ, ZX, ZY, XX, XY, YY
504  virial[0] = h_energyVirial->virial[3];
505  virial[1] = h_energyVirial->virial[4];
506  virial[2] = h_energyVirial->virial[1];
507 
508  virial[3] = h_energyVirial->virial[4];
509  virial[4] = h_energyVirial->virial[5];
510  virial[5] = h_energyVirial->virial[2];
511 
512  virial[6] = h_energyVirial->virial[1];
513  virial[7] = h_energyVirial->virial[7];
514  virial[8] = h_energyVirial->virial[0];
515  } else if (permutation == Perm_cX_Y_Z) {
516  // h_energyVirial->virial is storing XX, XY, XZ, YY, YZ, ZZ
517  virial[0] = h_energyVirial->virial[0];
518  virial[1] = h_energyVirial->virial[1];
519  virial[2] = h_energyVirial->virial[2];
520 
521  virial[3] = h_energyVirial->virial[1];
522  virial[4] = h_energyVirial->virial[3];
523  virial[5] = h_energyVirial->virial[4];
524 
525  virial[6] = h_energyVirial->virial[2];
526  virial[7] = h_energyVirial->virial[4];
527  virial[8] = h_energyVirial->virial[5];
528  }
529 #if 0
530  fprintf(stderr, "AP PME VIRIAL =\n"
531  " %g %g %g\n %g %g %g\n %g %g %g\n",
532  virial[0], virial[1], virial[2], virial[3], virial[4],
533  virial[5], virial[6], virial[7], virial[8]);
534 #endif
535 }
const int permutation

◆ solve()

void CudaPmeKSpaceCompute::solve ( Lattice lattice,
const bool  doEnergy,
const bool  doVirial,
float *  data 
)
virtual

Implements PmeKSpaceCompute.

Definition at line 276 of file CudaPmeSolverUtil.C.

References Lattice::a(), Lattice::a_r(), Lattice::b(), Lattice::b_r(), PmeKSpaceCompute::bm1, PmeKSpaceCompute::bm2, PmeKSpaceCompute::bm3, Lattice::c(), Lattice::c_r(), cudaCheck, PmeKSpaceCompute::j0, PmeKSpaceCompute::k0, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, PmeKSpaceCompute::kappa, NAMD_bug(), Perm_cX_Y_Z, Perm_Z_cX_Y, PmeKSpaceCompute::permutation, PmeKSpaceCompute::pmeGrid, scalar_sum(), PmeKSpaceCompute::size1, PmeKSpaceCompute::size2, PmeKSpaceCompute::size3, Lattice::volume(), Vector::x, Vector::y, and Vector::z.

276  {
277 #if 0
278  // Check lattice to make sure it is updating for constant pressure
279  fprintf(stderr, "K-SPACE LATTICE %g %g %g %g %g %g %g %g %g\n",
280  lattice.a().x, lattice.a().y, lattice.a().z,
281  lattice.b().x, lattice.b().y, lattice.b().z,
282  lattice.c().x, lattice.c().y, lattice.c().z);
283 #endif
284  cudaCheck(cudaSetDevice(deviceID));
285 
286  const bool doEnergyVirial = (doEnergy || doVirial);
287 
288  int nfft1, nfft2, nfft3;
289  float *prefac1, *prefac2, *prefac3;
290 
291  BigReal volume = lattice.volume();
292  Vector a_r = lattice.a_r();
293  Vector b_r = lattice.b_r();
294  Vector c_r = lattice.c_r();
295  float recip1x, recip1y, recip1z;
296  float recip2x, recip2y, recip2z;
297  float recip3x, recip3y, recip3z;
298 
299  if (permutation == Perm_Z_cX_Y) {
300  // Z, X, Y
301  nfft1 = pmeGrid.K3;
302  nfft2 = pmeGrid.K1;
303  nfft3 = pmeGrid.K2;
304  prefac1 = d_bm3;
305  prefac2 = d_bm1;
306  prefac3 = d_bm2;
307  recip1x = c_r.z;
308  recip1y = c_r.x;
309  recip1z = c_r.y;
310  recip2x = a_r.z;
311  recip2y = a_r.x;
312  recip2z = a_r.y;
313  recip3x = b_r.z;
314  recip3y = b_r.x;
315  recip3z = b_r.y;
316  } else if (permutation == Perm_cX_Y_Z) {
317  // X, Y, Z
318  nfft1 = pmeGrid.K1;
319  nfft2 = pmeGrid.K2;
320  nfft3 = pmeGrid.K3;
321  prefac1 = d_bm1;
322  prefac2 = d_bm2;
323  prefac3 = d_bm3;
324  recip1x = a_r.x;
325  recip1y = a_r.y;
326  recip1z = a_r.z;
327  recip2x = b_r.x;
328  recip2y = b_r.y;
329  recip2z = b_r.z;
330  recip3x = c_r.x;
331  recip3y = c_r.y;
332  recip3z = c_r.z;
333  } else {
334  NAMD_bug("CudaPmeKSpaceCompute::solve, invalid permutation");
335  }
336 
337  // ncall++;
338  // if (ncall == 1) {
339  // char filename[256];
340  // sprintf(filename,"dataf_%d_%d.txt",jblock,kblock);
341  // writeComplexToDisk((float2*)data, size1*size2*size3, filename, stream);
342  // }
343 
344  // if (ncall == 1) {
345  // float2* h_data = new float2[size1*size2*size3];
346  // float2* d_data = (float2*)data;
347  // copy_DtoH<float2>(d_data, h_data, size1*size2*size3, stream);
348  // cudaCheck(cudaStreamSynchronize(stream));
349  // FILE *handle = fopen("dataf.txt", "w");
350  // for (int z=0;z < pmeGrid.K3;z++) {
351  // for (int y=0;y < pmeGrid.K2;y++) {
352  // for (int x=0;x < pmeGrid.K1/2+1;x++) {
353  // int i;
354  // if (permutation == Perm_cX_Y_Z) {
355  // i = x + y*size1 + z*size1*size2;
356  // } else {
357  // i = z + x*size1 + y*size1*size2;
358  // }
359  // fprintf(handle, "%f %f\n", h_data[i].x, h_data[i].y);
360  // }
361  // }
362  // }
363  // fclose(handle);
364  // delete [] h_data;
365  // }
366 
367  // Clear energy and virial array if needed
368  if (doEnergyVirial) clear_device_array<EnergyVirial>(d_energyVirial, 1, stream);
369 
370 #ifdef TESTPID
371  if (1) {
372  cudaCheck(cudaStreamSynchronize(stream));
373  fprintf(stderr, "AP calling scalar sum\n");
374  fprintf(stderr, "(permutation == Perm_cX_Y_Z) = %s\n",
375  (permutation == Perm_cX_Y_Z ? "true" : "false"));
376  fprintf(stderr, "nfft1=%d nfft2=%d nfft3=%d\n", nfft1, nfft2, nfft3);
377  fprintf(stderr, "size1=%d size2=%d size3=%d\n", size1, size2, size3);
378  fprintf(stderr, "kappa=%g\n", kappa);
379  fprintf(stderr, "recip1x=%g recip1y=%g recip1z=%g\n",
380  (double)recip1x, (double)recip1y, (double)recip1z);
381  fprintf(stderr, "recip2x=%g recip2y=%g recip2z=%g\n",
382  (double)recip2x, (double)recip2y, (double)recip2z);
383  fprintf(stderr, "recip3x=%g recip3y=%g recip3z=%g\n",
384  (double)recip3x, (double)recip3y, (double)recip3z);
385  fprintf(stderr, "volume=%g\n", volume);
386  fprintf(stderr, "j0=%d k0=%d\n", j0, k0);
387  float *bm1, *bm2, *bm3;
388  allocate_host<float>(&bm1, nfft1);
389  allocate_host<float>(&bm2, nfft2);
390  allocate_host<float>(&bm3, nfft3);
391  copy_DtoH<float>(prefac1, bm1, nfft1, stream);
392  copy_DtoH<float>(prefac2, bm2, nfft2, stream);
393  copy_DtoH<float>(prefac3, bm3, nfft3, stream);
394  TestArray_write<float>("bm1_good.bin", "structure factor bm1 good",
395  bm1, nfft1);
396  TestArray_write<float>("bm2_good.bin", "structure factor bm2 good",
397  bm2, nfft2);
398  TestArray_write<float>("bm3_good.bin", "structure factor bm3 good",
399  bm3, nfft3);
400  deallocate_host<float>(&bm1);
401  deallocate_host<float>(&bm2);
402  deallocate_host<float>(&bm3);
403  }
404 #endif
405 
406  scalar_sum(permutation == Perm_cX_Y_Z, nfft1, nfft2, nfft3, size1, size2, size3, kappa,
407  recip1x, recip1y, recip1z, recip2x, recip2y, recip2z, recip3x, recip3y, recip3z,
408  volume, prefac1, prefac2, prefac3, j0, k0, doEnergyVirial,
409  &d_energyVirial->energy, d_energyVirial->virial, (float2*)data,
410  stream);
411 #ifdef TESTPID
412  if (1) {
413  cudaCheck(cudaStreamSynchronize(stream));
414  fprintf(stderr, "AP SCALAR SUM\n");
415  fprintf(stderr, "COPY DEVICE ARRAYS BACK TO HOST\n");
416  int m = 2 * (nfft1/2 + 1) * nfft2 * nfft3;
417  float *tran = 0;
418  allocate_host<float>(&tran, m);
419  copy_DtoH<float>((float*)data, tran, m, stream);
420  cudaCheck(cudaStreamSynchronize(stream));
421  TestArray_write<float>("tran_potential_grid_good.bin",
422  "transformed potential grid good", tran, m);
423  deallocate_host<float>(&tran);
424  }
425 #endif
426 
427  // Copy energy and virial to host if needed
428  if (doEnergyVirial) {
429  copy_DtoH<EnergyVirial>(d_energyVirial, h_energyVirial, 1, stream);
430  cudaCheck(cudaEventRecord(copyEnergyVirialEvent, stream));
431  // cudaCheck(cudaStreamSynchronize(stream));
432  }
433 
434 }
NAMD_HOST_DEVICE Vector c() const
Definition: Lattice.h:270
Definition: Vector.h:72
int K2
Definition: PmeBase.h:21
int K1
Definition: PmeBase.h:21
BigReal z
Definition: Vector.h:74
void scalar_sum(const bool orderXYZ, const int nfft1, const int nfft2, const int nfft3, const int size1, const int size2, const int size3, const double kappa, const float recip1x, const float recip1y, const float recip1z, const float recip2x, const float recip2y, const float recip2z, const float recip3x, const float recip3y, const float recip3z, const double volume, const float *prefac1, const float *prefac2, const float *prefac3, const int k2_00, const int k3_00, const bool doEnergyVirial, double *energy, double *virial, float2 *data, cudaStream_t stream)
void NAMD_bug(const char *err_msg)
Definition: common.C:196
BigReal x
Definition: Vector.h:74
NAMD_HOST_DEVICE BigReal volume(void) const
Definition: Lattice.h:293
NAMD_HOST_DEVICE Vector a_r() const
Definition: Lattice.h:284
NAMD_HOST_DEVICE Vector b_r() const
Definition: Lattice.h:285
NAMD_HOST_DEVICE Vector c_r() const
Definition: Lattice.h:286
NAMD_HOST_DEVICE Vector b() const
Definition: Lattice.h:269
int K3
Definition: PmeBase.h:21
const int permutation
BigReal y
Definition: Vector.h:74
#define cudaCheck(stmt)
Definition: CudaUtils.h:242
NAMD_HOST_DEVICE Vector a() const
Definition: Lattice.h:268
double BigReal
Definition: common.h:123

◆ waitEnergyAndVirial()

void CudaPmeKSpaceCompute::waitEnergyAndVirial ( )

The documentation for this class was generated from the following files: