NAMD
Public Member Functions | Friends | List of all members
ComputePme Class Reference

#include <ComputePme.h>

Inheritance diagram for ComputePme:
Compute ComputePmeUtil

Public Member Functions

 ComputePme (ComputeID c, PatchID pid)
 
virtual ~ComputePme ()
 
void initialize ()
 
void atomUpdate ()
 
int noWork ()
 
void doWork ()
 
void doQMWork ()
 
void ungridForces ()
 
void setMgr (ComputePmeMgr *mgr)
 
- Public Member Functions inherited from Compute
 Compute (ComputeID)
 
int type ()
 
virtual ~Compute ()
 
void setNumPatches (int n)
 
int getNumPatches ()
 
virtual void patchReady (PatchID, int doneMigration, int seq)
 
virtual void finishPatch (int)
 
int sequence (void)
 
int priority (void)
 
int getGBISPhase (void)
 
virtual void gbisP2PatchReady (PatchID, int seq)
 
virtual void gbisP3PatchReady (PatchID, int seq)
 
- Public Member Functions inherited from ComputePmeUtil
 ComputePmeUtil ()
 
 ~ComputePmeUtil ()
 

Friends

class ComputePmeMgr
 

Additional Inherited Members

- Static Public Member Functions inherited from ComputePmeUtil
static void select (void)
 
- Public Attributes inherited from Compute
const ComputeID cid
 
LDObjHandle ldObjHandle
 
LocalWorkMsg *const localWorkMsg
 
- Static Public Attributes inherited from ComputePmeUtil
static int numGrids
 
static Bool alchOn
 
static Bool alchFepOn
 
static Bool alchThermIntOn
 
static Bool alchDecouple
 
static BigReal alchElecLambdaStart
 
static Bool lesOn
 
static int lesFactor
 
static Bool pairOn
 
static Bool selfOn
 
static Bool LJPMEOn
 
- Protected Member Functions inherited from Compute
void enqueueWork ()
 
- Protected Attributes inherited from Compute
int computeType
 
int basePriority
 
int gbisPhase
 
int gbisPhasePriority [3]
 

Detailed Description

Definition at line 48 of file ComputePme.h.

Constructor & Destructor Documentation

◆ ComputePme()

ComputePme::ComputePme ( ComputeID  c,
PatchID  pid 
)

Definition at line 2712 of file ComputePme.C.

References Compute::basePriority, DebugM, PmeGrid::dim2, PmeGrid::dim3, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, ComputePmeUtil::numGrids, Node::Object(), PmeGrid::order, PME_PRIORITY, Compute::setNumPatches(), Node::simParameters, and simParams.

2712  : Compute(c), patchID(pid)
2713 {
2714  DebugM(4,"ComputePme created.\n");
2716  setNumPatches(1);
2717 
2718  CProxy_ComputePmeMgr::ckLocalBranch(
2719  CkpvAccess(BOCclass_group).computePmeMgr)->addCompute(this);
2720 
2722 
2723  qmForcesOn = simParams->qmForcesOn;
2724  offload = simParams->PMEOffload;
2725 
2726  numGridsMax = numGrids;
2727 
2728  myGrid.K1 = simParams->PMEGridSizeX;
2729  myGrid.K2 = simParams->PMEGridSizeY;
2730  myGrid.K3 = simParams->PMEGridSizeZ;
2731  myGrid.order = simParams->PMEInterpOrder;
2732  myGrid.dim2 = myGrid.K2;
2733  myGrid.dim3 = 2 * (myGrid.K3/2 + 1);
2734 
2735 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2736  cuda_atoms_offset = 0;
2737  f_data_host = 0;
2738  f_data_dev = 0;
2739  if ( ! offload )
2740 #endif
2741  {
2742  for ( int g=0; g<numGrids; ++g ) myRealSpace[g] = new PmeRealSpace(myGrid);
2743  }
2744 
2745  atomsChanged = 0;
2746 
2747  qmLoclIndx = 0;
2748  qmLocalCharges = 0;
2749 }
static Node * Object()
Definition: Node.h:86
void setNumPatches(int n)
Definition: Compute.h:52
int dim2
Definition: PmeBase.h:22
int dim3
Definition: PmeBase.h:22
int K2
Definition: PmeBase.h:21
SimParameters * simParameters
Definition: Node.h:181
int K1
Definition: PmeBase.h:21
#define DebugM(x, y)
Definition: Debug.h:75
static int numGrids
Definition: ComputePme.h:32
#define PME_PRIORITY
Definition: Priorities.h:29
int order
Definition: PmeBase.h:23
#define simParams
Definition: Output.C:131
int K3
Definition: PmeBase.h:21
int basePriority
Definition: Compute.h:37
Compute(ComputeID)
Definition: Compute.C:37

◆ ~ComputePme()

ComputePme::~ComputePme ( )
virtual

Definition at line 2985 of file ComputePme.C.

2986 {
2987 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2988  if ( ! offload )
2989 #endif
2990  {
2991  for ( int g=0; g<numGridsMax; ++g ) delete myRealSpace[g];
2992  }
2993 }

Member Function Documentation

◆ atomUpdate()

void ComputePme::atomUpdate ( void  )
virtual

Reimplemented from Compute.

Definition at line 2710 of file ComputePme.C.

2710 { atomsChanged = 1; }

◆ doQMWork()

void ComputePme::doQMWork ( )

Definition at line 3078 of file ComputePme.C.

References doWork(), Molecule::get_numQMAtoms(), Molecule::get_qmAtmChrg(), Molecule::get_qmAtmIndx(), Molecule::get_qmAtomGroup(), Patch::getCompAtomExtInfo(), Patch::getNumAtoms(), Node::molecule, and Node::Object().

3078  {
3079 
3080 // iout << CkMyPe() << ") ----> PME doQMWork.\n" << endi ;
3081 
3082 
3083  int numQMAtms = Node::Object()->molecule->get_numQMAtoms();
3084  const Real *qmAtmChrg = Node::Object()->molecule->get_qmAtmChrg() ;
3085  const int *qmAtmIndx = Node::Object()->molecule->get_qmAtmIndx() ;
3086  const Real *qmAtomGroup = Node::Object()->molecule->get_qmAtomGroup() ;
3087 
3088  const CompAtomExt *xExt = patch->getCompAtomExtInfo();
3089 
3090  // Determine number of qm atoms in this patch for the current step.
3091  numLocalQMAtoms = 0;
3092  for (int paIter=0; paIter<patch->getNumAtoms(); paIter++) {
3093  if ( qmAtomGroup[xExt[paIter].id] != 0 ) {
3094  numLocalQMAtoms++;
3095  }
3096  }
3097 
3098  // We prepare a charge vector with QM charges for use in the PME calculation.
3099 
3100  // Clears data from last step, if there is any.
3101  if (qmLoclIndx != 0)
3102  delete [] qmLoclIndx;
3103  if (qmLocalCharges != 0)
3104  delete [] qmLocalCharges;
3105 
3106  qmLoclIndx = new int[numLocalQMAtoms] ;
3107  qmLocalCharges = new Real[numLocalQMAtoms] ;
3108 
3109  // I am assuming there will be (in general) more QM atoms among all QM groups
3110  // than MM atoms in a patch.
3111  int procAtms = 0;
3112 
3113  for (int paIter=0; paIter<patch->getNumAtoms(); paIter++) {
3114 
3115  for (int i=0; i<numQMAtms; i++) {
3116 
3117  if (qmAtmIndx[i] == xExt[paIter].id) {
3118 
3119  qmLoclIndx[procAtms] = paIter ;
3120  qmLocalCharges[procAtms] = qmAtmChrg[i];
3121 
3122  procAtms++;
3123  break;
3124  }
3125 
3126  }
3127 
3128  if (procAtms == numLocalQMAtoms)
3129  break;
3130  }
3131 
3132  doWork();
3133  return ;
3134 }
static Node * Object()
Definition: Node.h:86
int getNumAtoms() const
Definition: Patch.h:105
void doWork()
Definition: ComputePme.C:3136
const int * get_qmAtmIndx()
Definition: Molecule.h:863
int get_numQMAtoms()
Definition: Molecule.h:865
float Real
Definition: common.h:118
Real * get_qmAtmChrg()
Definition: Molecule.h:862
const Real * get_qmAtomGroup() const
Definition: Molecule.h:859
Molecule * molecule
Definition: Node.h:179
CompAtomExt * getCompAtomExtInfo()
Definition: Patch.h:117

◆ doWork()

void ComputePme::doWork ( void  )
virtual

Reimplemented from Compute.

Definition at line 3136 of file ComputePme.C.

References ComputePmeMgr::a_data_dev, ComputePmeMgr::a_data_host, ComputePmeUtil::alchDecouple, ComputePmeUtil::alchOn, Compute::basePriority, ResizeArray< Elem >::begin(), PmeParticle::cg, CompAtom::charge, ComputePmeMgr::chargeGridReady(), Compute::cid, Box< Owner, Data >::close(), COMPUTE_HOME_PRIORITY, COULOMB, ComputePmeMgr::cuda_atoms_alloc, ComputePmeMgr::cuda_atoms_count, ComputePmeMgr::cuda_busy, cuda_errcheck(), ComputePmeMgr::cuda_lock, ComputePmeMgr::cuda_submit_charges(), ComputePmeMgr::cuda_submit_charges_deque, DebugM, deviceCUDA, ComputeNonbondedUtil::dielectric_1, CompAtomExt::dispcoef, Flags::doMolly, ComputeNonbondedUtil::ewaldcof, PmeRealSpace::fill_charges(), Patch::flags, Patch::getCompAtomExtInfo(), DeviceCUDA::getDeviceID(), DeviceCUDA::getMasterPe(), Patch::getNumAtoms(), PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Flags::lattice, ComputePmeMgr::cuda_submit_charges_args::lattice, ComputePmeUtil::lesOn, ComputeNonbondedUtil::LJewaldcof, ComputePmeUtil::LJPMEOn, ComputePmeMgr::cuda_submit_charges_args::mgr, NAMD_bug(), ComputePmeUtil::numGrids, Box< Owner, Data >::open(), PmeGrid::order, ComputePmeUtil::pairOn, CompAtom::partition, PATCH_PRIORITY, PME_OFFLOAD_PRIORITY, PME_PRIORITY, ComputePmeMgr::pmeComputes, CompAtom::position, ResizeArray< Elem >::resize(), scale_coordinates(), ComputeNonbondedUtil::scaling, ComputePmeUtil::selfOn, Compute::sequence(), ComputePmeMgr::cuda_submit_charges_args::sequence, PmeRealSpace::set_num_atoms(), ResizeArray< Elem >::size(), SQRT_PI, ComputePmeMgr::submitReductions(), TRACE_COMPOBJ_IDOFFSET, ungridForces(), PmeParticle::x, Vector::x, PmeParticle::y, Vector::y, PmeParticle::z, and Vector::z.

Referenced by doQMWork().

3137 {
3138  DebugM(4,"Entering ComputePme::doWork().\n");
3139 
3141 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3143 #else
3145 #endif
3146  ungridForces();
3147  // CkPrintf("doWork 2 pe %d %d %d\n", CkMyPe(), myMgr->ungridForcesCount, myMgr->recipEvirCount);
3148  if ( ! --(myMgr->ungridForcesCount) && ! myMgr->recipEvirCount ) myMgr->submitReductions();
3149  return;
3150  }
3152  // CkPrintf("doWork 1 pe %d %d %d\n", CkMyPe(), myMgr->ungridForcesCount, myMgr->recipEvirCount);
3153 
3154 #ifdef TRACE_COMPUTE_OBJECTS
3155  double traceObjStartTime = CmiWallTimer();
3156 #endif
3157 
3158 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3159  if ( offload ) cudaSetDevice(deviceCUDA->getDeviceID());
3160 #endif
3161 
3162  // allocate storage
3163  numLocalAtoms = patch->getNumAtoms();
3164 
3165  Lattice &lattice = patch->flags.lattice;
3166 
3167  // For more than one grid, allocate base grid plus numGrids extra storage.
3168  // Each storage segment can hold the max atoms possible, numLocalAtoms.
3169  // Copy coordinate data from position box into base grid plus create
3170  // auxiliary array with partition number. For alchemy, copy coordinate data
3171  // from base grid into each extra buffer depending on partition values.
3172  // Storage is all PmeParticle: x,y,z,q double precision.
3173  int extraGrids = 0;
3174  if ( ! LJPMEOn && (numGrids > 1 || selfOn) ) {
3175  extraGrids = 1;
3176  }
3177 
3178  localData_alloc.resize(numLocalAtoms*(numGrids+extraGrids));
3179  localData = localData_alloc.begin();
3180  localPartition_alloc.resize(numLocalAtoms);
3181  localPartition = localPartition_alloc.begin();
3182 
3183  // We have local buffers: base, 0, 1, ..., numGrids-1 (for numGrids > 1).
3184  // localGridData points to the "0, 1, ..., numGrids-1" buffers.
3185  int g;
3186  for ( g=0; g<numGrids; ++g ) {
3187  localGridData[g] = localData + numLocalAtoms*(g+extraGrids);
3188  }
3189 
3190  // get positions and charges
3191  PmeParticle * data_ptr = localData;
3192  unsigned char * part_ptr = localPartition;
3193  const BigReal coulomb_sqrt = sqrt( COULOMB * ComputeNonbondedUtil::scaling
3195  {
3196  CompAtom *x = positionBox->open();
3197  // CompAtomExt *xExt = patch->getCompAtomExtInfo();
3198  if ( patch->flags.doMolly ) {
3199  positionBox->close(&x);
3200  x = avgPositionBox->open();
3201  }
3202  int numAtoms = patch->getNumAtoms();
3203 
3204  for(int i=0; i<numAtoms; ++i)
3205  {
3206  data_ptr->x = x[i].position.x;
3207  data_ptr->y = x[i].position.y;
3208  data_ptr->z = x[i].position.z;
3209  data_ptr->cg = coulomb_sqrt * x[i].charge;
3210  ++data_ptr;
3211  *part_ptr = x[i].partition;
3212  ++part_ptr;
3213  }
3214 
3215  // QM loop to overwrite charges of QM atoms.
3216  // They are zero for NAMD, but are updated in ComputeQM.
3217  if ( qmForcesOn ) {
3218 
3219  for(int i=0; i<numLocalQMAtoms; ++i)
3220  {
3221  localData[qmLoclIndx[i]].cg = coulomb_sqrt * qmLocalCharges[i];
3222  }
3223 
3224  }
3225 
3226  if ( patch->flags.doMolly ) { avgPositionBox->close(&x); }
3227  else { positionBox->close(&x); }
3228  }
3229 
3230  // copy to other grids if needed
3231  if ( (alchOn && (!alchDecouple)) || lesOn ) {
3232  for ( g=0; g<numGrids; ++g ) {
3233  PmeParticle *lgd = localGridData[g];
3234  if (g < 2) {
3235  int nga = 0;
3236  for(int i=0; i<numLocalAtoms; ++i) {
3237  if ( localPartition[i] == 0 || localPartition[i] == (g+1) || localPartition[i] == (g+3)) {
3238  // for FEP/TI: grid 0 gets non-alch + partition 1 + partition 3;
3239  // grid 1 gets non-alch + partition 2 + + partition 4;
3240  lgd[nga++] = localData[i];
3241  }
3242  }
3243  numGridAtoms[g] = nga;
3244  } else {
3245  int nga = 0;
3246  for(int i=0; i<numLocalAtoms; ++i) {
3247  if ( localPartition[i] == 0 ) {
3248  // grid 2 (only if called for with numGrids=3) gets only non-alch
3249  lgd[nga++] = localData[i];
3250  }
3251  }
3252  numGridAtoms[g] = nga;
3253  }
3254  }
3255  } else if ( alchOn && alchDecouple) {
3256  // alchemical decoupling: four grids
3257  // g=0: partition 0 and partition 1
3258  // g=1: partition 0 and partition 2
3259  // g=2: only partition 1 atoms
3260  // g=3: only partition 2 atoms
3261  // plus one grid g=4, only partition 0, if numGrids=5
3262  for ( g=0; g<2; ++g ) { // same as before for first 2
3263  PmeParticle *lgd = localGridData[g];
3264  int nga = 0;
3265  for(int i=0; i<numLocalAtoms; ++i) {
3266  if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
3267  lgd[nga++] = localData[i];
3268  }
3269  }
3270  numGridAtoms[g] = nga;
3271  }
3272  for (g=2 ; g<4 ; ++g ) { // only alchemical atoms for these 2
3273  PmeParticle *lgd = localGridData[g];
3274  int nga = 0;
3275  for(int i=0; i<numLocalAtoms; ++i) {
3276  if ( localPartition[i] == (g-1) ) {
3277  lgd[nga++] = localData[i];
3278  }
3279  }
3280  numGridAtoms[g] = nga;
3281  }
3282  for (g=4 ; g<numGrids ; ++g ) { // only non-alchemical atoms
3283  // numGrids=5 only if alchElecLambdaStart > 0
3284  PmeParticle *lgd = localGridData[g];
3285  int nga = 0;
3286  for(int i=0; i<numLocalAtoms; ++i) {
3287  if ( localPartition[i] == 0 ) {
3288  lgd[nga++] = localData[i];
3289  }
3290  }
3291  numGridAtoms[g] = nga;
3292  }
3293  } else if ( selfOn ) {
3294  if ( numGrids != 1 ) NAMD_bug("ComputePme::doWork assertion 1 failed");
3295  g = 0;
3296  PmeParticle *lgd = localGridData[g];
3297  int nga = 0;
3298  for(int i=0; i<numLocalAtoms; ++i) {
3299  if ( localPartition[i] == 1 ) {
3300  lgd[nga++] = localData[i];
3301  }
3302  }
3303  numGridAtoms[g] = nga;
3304  } else if ( pairOn ) {
3305  if ( numGrids != 3 ) NAMD_bug("ComputePme::doWork assertion 2 failed");
3306  g = 0;
3307  PmeParticle *lgd = localGridData[g];
3308  int nga = 0;
3309  for(int i=0; i<numLocalAtoms; ++i) {
3310  if ( localPartition[i] == 1 || localPartition[i] == 2 ) {
3311  lgd[nga++] = localData[i];
3312  }
3313  }
3314  numGridAtoms[g] = nga;
3315  for ( g=1; g<3; ++g ) {
3316  PmeParticle *lgd = localGridData[g];
3317  int nga = 0;
3318  for(int i=0; i<numLocalAtoms; ++i) {
3319  if ( localPartition[i] == g ) {
3320  lgd[nga++] = localData[i];
3321  }
3322  }
3323  numGridAtoms[g] = nga;
3324  }
3325  } else if ( LJPMEOn ) {
3326  const CompAtomExt *xExt = patch->getCompAtomExtInfo(); // for dispersion coef
3327  if ( numGrids != 2 ) NAMD_bug("ComputePme::doWork assertion for LJ-PME failed");
3328  // Reset localGridData pointers and set atom counts
3329  // localGridData[1] = localGridData[0];
3330  // localGridData[0] = localData;
3331  numGridAtoms[0] = numLocalAtoms;
3332  numGridAtoms[1] = numLocalAtoms;
3333  PmeParticle *lgd = localGridData[1];
3334  for (int i=0; i < numLocalAtoms; ++i) {
3335  lgd[i].x = localData[i].x;
3336  lgd[i].y = localData[i].y;
3337  lgd[i].z = localData[i].z;
3338  lgd[i].cg = xExt[i].dispcoef; // no scaling needed for dispersion
3339  }
3340  } else {
3341  // This else handles the numGrids==1 case.
3342  // In this case, localGridData[0] and numGridAtoms[0] aren't set to
3343  // usable values, so we reset them to point to the base buffer.
3344  // Expect the calculation to be done on localGridData[0..numGrids],
3345  // each buffer containing numGridAtoms[0..numGrids].
3346  if ( numGrids != 1 ) NAMD_bug("ComputePme::doWork assertion 3 failed");
3347  // localGridData[0] = localData;
3348  numGridAtoms[0] = numLocalAtoms;
3349  }
3350 
3351  if ( ! myMgr->doWorkCount ) {
3352  myMgr->doWorkCount = myMgr->pmeComputes.size();
3353 
3354 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3355  if ( ! offload )
3356 #endif // NAMD_CUDA
3357  {
3358  memset( (void*) myMgr->fz_arr, 0, (myGrid.K3+myGrid.order-1) * sizeof(char) );
3359 
3360  for (int i=0; i<myMgr->q_count; ++i) {
3361  memset( (void*) (myMgr->q_list[i]), 0, (myGrid.K3+myGrid.order-1) * sizeof(float) );
3362  }
3363  }
3364 
3365  for ( g=0; g<numGrids; ++g ) {
3366  myMgr->evir[g] = 0;
3367  }
3368 
3369  myMgr->strayChargeErrors = 0;
3370 
3371  myMgr->compute_sequence = sequence();
3372  }
3373 
3374  if ( sequence() != myMgr->compute_sequence ) NAMD_bug("ComputePme sequence mismatch in doWork()");
3375 
3376  int strayChargeErrors = 0;
3377 
3378  // XXX need self energy for LJ-PME
3379  // calculate self energy
3380  const BigReal ewaldcof = ComputeNonbondedUtil::ewaldcof;
3381  for ( g=0; g<numGrids; ++g ) {
3382  BigReal selfEnergy = 0;
3383  data_ptr = localGridData[g];
3384  for (int i=0; i<numGridAtoms[g]; ++i) {
3385  selfEnergy += data_ptr->cg * data_ptr->cg;
3386  ++data_ptr;
3387  }
3388  if ( LJPMEOn && 1==g ) {
3389  const BigReal LJewaldcof = ComputeNonbondedUtil::LJewaldcof;
3390  double alpha6 = LJewaldcof * LJewaldcof * LJewaldcof;
3391  alpha6 = alpha6 * alpha6;
3392  selfEnergy *= (1./12.) * alpha6;
3393  } else {
3394  selfEnergy *= -1. * ewaldcof / SQRT_PI;
3395  }
3396  myMgr->evir[g][0] += selfEnergy;
3397 
3398  float **q = myMgr->q_arr + g*myMgr->fsize;
3399  char *f = myMgr->f_arr + g*myMgr->fsize;
3400 
3401  scale_coordinates(localGridData[g], numGridAtoms[g], lattice, myGrid);
3402 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3403  if ( offload ) {
3404  if ( myMgr->cuda_atoms_alloc == 0 ) { // first call
3405  int na = myMgr->cuda_atoms_alloc = 1.2 * (myMgr->cuda_atoms_count + 1000);
3406  cuda_errcheck("before malloc atom data for pme");
3407  cudaMallocHost((void**) &(myMgr->a_data_host), 7*na*sizeof(float));
3408  cudaMalloc((void**) &(myMgr->a_data_dev), 7*na*sizeof(float));
3409  cuda_errcheck("malloc atom data for pme");
3410  myMgr->cuda_atoms_count = 0;
3411  }
3412  cuda_atoms_offset = myMgr->cuda_atoms_count;
3413  int n = numGridAtoms[g];
3414  myMgr->cuda_atoms_count += n;
3415  if ( myMgr->cuda_atoms_count > myMgr->cuda_atoms_alloc ) {
3416  CkPrintf("Pe %d expanding CUDA PME atoms allocation because %d > %d\n",
3417  CkMyPe(), myMgr->cuda_atoms_count, myMgr->cuda_atoms_alloc);
3418  cuda_errcheck("before malloc expanded atom data for pme");
3419  int na = myMgr->cuda_atoms_alloc = 1.2 * (myMgr->cuda_atoms_count + 1000);
3420  const float *a_data_host_old = myMgr->a_data_host;
3421  cudaMallocHost((void**) &(myMgr->a_data_host), 7*na*sizeof(float));
3422  cuda_errcheck("malloc expanded host atom data for pme");
3423  memcpy(myMgr->a_data_host, a_data_host_old, 7*cuda_atoms_offset*sizeof(float));
3424  cudaFreeHost((void*) a_data_host_old);
3425  cuda_errcheck("free expanded host atom data for pme");
3426  cudaFree(myMgr->a_data_dev);
3427  cuda_errcheck("free expanded dev atom data for pme");
3428  cudaMalloc((void**) &(myMgr->a_data_dev), 7*na*sizeof(float));
3429  cuda_errcheck("malloc expanded dev atom data for pme");
3430  }
3431  float *a_data_host = myMgr->a_data_host + 7 * cuda_atoms_offset;
3432  data_ptr = localGridData[g];
3433  double order_1 = myGrid.order - 1;
3434  double K1 = myGrid.K1;
3435  double K2 = myGrid.K2;
3436  double K3 = myGrid.K3;
3437  int found_negative = 0;
3438  for ( int i=0; i<n; ++i ) {
3439  if ( data_ptr[i].x < 0 || data_ptr[i].y < 0 || data_ptr[i].z < 0 ) {
3440  found_negative = 1;
3441  // CkPrintf("low coord: %f %f %f\n", data_ptr[i].x, data_ptr[i].y, data_ptr[i].z);
3442  }
3443  double x_int = (int) data_ptr[i].x;
3444  double y_int = (int) data_ptr[i].y;
3445  double z_int = (int) data_ptr[i].z;
3446  a_data_host[7*i ] = data_ptr[i].x - x_int; // subtract in double precision
3447  a_data_host[7*i+1] = data_ptr[i].y - y_int;
3448  a_data_host[7*i+2] = data_ptr[i].z - z_int;
3449  a_data_host[7*i+3] = data_ptr[i].cg;
3450  x_int -= order_1; if ( x_int < 0 ) x_int += K1;
3451  y_int -= order_1; if ( y_int < 0 ) y_int += K2;
3452  z_int -= order_1; if ( z_int < 0 ) z_int += K3;
3453  a_data_host[7*i+4] = x_int;
3454  a_data_host[7*i+5] = y_int;
3455  a_data_host[7*i+6] = z_int;
3456  }
3457  if ( found_negative ) NAMD_bug("found negative atom coordinate in ComputePme::doWork");
3458  } else
3459 #endif // NAMD_CUDA
3460  {
3461  myRealSpace[g]->set_num_atoms(numGridAtoms[g]);
3462  myRealSpace[g]->fill_charges(q, myMgr->q_list, myMgr->q_count, strayChargeErrors, f, myMgr->fz_arr, localGridData[g]);
3463  }
3464  }
3465  myMgr->strayChargeErrors += strayChargeErrors;
3466 
3467 #ifdef TRACE_COMPUTE_OBJECTS
3468  traceUserBracketEvent(TRACE_COMPOBJ_IDOFFSET+this->cid, traceObjStartTime, CmiWallTimer());
3469 #endif
3470 
3471  if ( --(myMgr->doWorkCount) == 0 ) {
3472 // cudaDeviceSynchronize(); // XXXX
3473 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
3474  if ( offload ) {
3476  args.mgr = myMgr;
3477  args.lattice = &lattice;
3478  args.sequence = sequence();
3479  CmiLock(ComputePmeMgr::cuda_lock);
3480  if ( ComputePmeMgr::cuda_busy ) {
3482  } else if ( CkMyPe() == deviceCUDA->getMasterPe() ) {
3483  // avoid adding work to nonbonded data preparation pe
3484  args.mgr->cuda_submit_charges(*args.lattice, args.sequence);
3485  } else {
3486  ComputePmeMgr::cuda_busy = true;
3487  while ( 1 ) {
3488  CmiUnlock(ComputePmeMgr::cuda_lock);
3489  args.mgr->cuda_submit_charges(*args.lattice, args.sequence);
3490  CmiLock(ComputePmeMgr::cuda_lock);
3494  } else {
3495  ComputePmeMgr::cuda_busy = false;
3496  break;
3497  }
3498  }
3499  }
3500  CmiUnlock(ComputePmeMgr::cuda_lock);
3501  } else
3502 #endif // NAMD_CUDA
3503  {
3504  myMgr->chargeGridReady(lattice,sequence());
3505  }
3506  }
3507  atomsChanged = 0;
3508 }
static void scale_coordinates(PmeParticle p[], int N, Lattice lattice, PmeGrid grid)
Definition: PmeBase.inl:17
void ungridForces()
Definition: ComputePme.C:4090
int getNumAtoms() const
Definition: Patch.h:105
int sequence(void)
Definition: Compute.h:64
int size(void) const
Definition: ResizeArray.h:131
float * a_data_dev
Definition: ComputePme.C:447
#define TRACE_COMPOBJ_IDOFFSET
Definition: Compute.h:77
void cuda_errcheck(const char *msg)
Definition: ComputePme.C:67
double x
Definition: PmeBase.h:37
int K2
Definition: PmeBase.h:21
int K1
Definition: PmeBase.h:21
DispCoef dispcoef
Definition: NamdTypes.h:151
#define COULOMB
Definition: common.h:53
#define DebugM(x, y)
Definition: Debug.h:75
static int numGrids
Definition: ComputePme.h:32
BigReal z
Definition: Vector.h:74
Position position
Definition: NamdTypes.h:78
static Bool alchOn
Definition: ComputePme.h:33
double cg
Definition: PmeBase.h:38
double z
Definition: PmeBase.h:37
double y
Definition: PmeBase.h:37
Flags flags
Definition: Patch.h:128
#define SQRT_PI
Definition: ComputeExt.C:30
void resize(int i)
Definition: ResizeArray.h:84
Charge charge
Definition: NamdTypes.h:79
void chargeGridReady(Lattice &lattice, int sequence)
Definition: ComputePme.C:3626
int cuda_atoms_alloc
Definition: ComputePme.C:451
#define PME_PRIORITY
Definition: Priorities.h:29
int order
Definition: PmeBase.h:23
#define COMPUTE_HOME_PRIORITY
Definition: Priorities.h:76
int getMasterPe()
Definition: DeviceCUDA.h:137
void NAMD_bug(const char *err_msg)
Definition: common.C:195
void fill_charges(float **q_arr, float **q_arr_list, int &q_arr_count, int &stray_count, char *f_arr, char *fz_arr, PmeParticle p[])
Definition: PmeRealSpace.C:47
static std::deque< cuda_submit_charges_args > cuda_submit_charges_deque
Definition: ComputePme.C:469
uint8 partition
Definition: NamdTypes.h:81
BigReal x
Definition: Vector.h:74
#define PME_OFFLOAD_PRIORITY
Definition: Priorities.h:41
static Bool LJPMEOn
Definition: ComputePme.h:43
static Bool alchDecouple
Definition: ComputePme.h:36
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
void submitReductions()
Definition: ComputePme.C:4297
int getDeviceID()
Definition: DeviceCUDA.h:144
static Bool pairOn
Definition: ComputePme.h:40
int K3
Definition: PmeBase.h:21
int cuda_atoms_count
Definition: ComputePme.C:450
static Bool lesOn
Definition: ComputePme.h:38
iterator begin(void)
Definition: ResizeArray.h:36
BigReal y
Definition: Vector.h:74
ResizeArray< ComputePme * > pmeComputes
Definition: ComputePme.C:482
static Bool selfOn
Definition: ComputePme.h:41
Lattice lattice
Definition: PatchTypes.h:46
void set_num_atoms(int natoms)
Definition: PmeRealSpace.C:20
int basePriority
Definition: Compute.h:37
void cuda_submit_charges(Lattice &lattice, int sequence)
Definition: ComputePme.C:3512
Data * open(void)
Definition: Box.h:39
const ComputeID cid
Definition: Compute.h:43
void close(Data **const t)
Definition: Box.h:49
int doMolly
Definition: PatchTypes.h:25
float * a_data_host
Definition: ComputePme.C:446
static bool cuda_busy
Definition: ComputePme.C:470
double BigReal
Definition: common.h:123
#define PATCH_PRIORITY(PID)
Definition: Priorities.h:25
static CmiNodeLock cuda_lock
Definition: ComputePme.C:452
CompAtomExt * getCompAtomExtInfo()
Definition: Patch.h:117

◆ initialize()

void ComputePme::initialize ( void  )
virtual

Reimplemented from Compute.

Definition at line 2751 of file ComputePme.C.

References ComputePmeMgr::cuda_atoms_count, Patch::getNumAtoms(), NAMD_bug(), PatchMap::Object(), Patch::registerAvgPositionPickup(), Patch::registerForceDeposit(), and Patch::registerPositionPickup().

2751  {
2752  if (!(patch = PatchMap::Object()->patch(patchID))) {
2753  NAMD_bug("ComputePme used with unknown patch.");
2754  }
2755  positionBox = patch->registerPositionPickup(this);
2756  avgPositionBox = patch->registerAvgPositionPickup(this);
2757  forceBox = patch->registerForceDeposit(this);
2758 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
2759  if ( offload ) {
2760  myMgr->cuda_atoms_count += patch->getNumAtoms();
2761  }
2762 #endif
2763 }
Box< Patch, CompAtom > * registerAvgPositionPickup(Compute *cid)
Definition: Patch.C:133
int getNumAtoms() const
Definition: Patch.h:105
static PatchMap * Object()
Definition: PatchMap.h:27
void NAMD_bug(const char *err_msg)
Definition: common.C:195
int cuda_atoms_count
Definition: ComputePme.C:450
Box< Patch, CompAtom > * registerPositionPickup(Compute *cid)
Definition: Patch.C:106
Box< Patch, Results > * registerForceDeposit(Compute *cid)
Definition: Patch.C:227

◆ noWork()

int ComputePme::noWork ( )
virtual

Reimplemented from Compute.

Definition at line 3039 of file ComputePme.C.

References ResizeArray< Elem >::add(), Flags::doFullElectrostatics, Patch::flags, ComputePmeMgr::pmeComputes, ResizeArray< Elem >::size(), Box< Owner, Data >::skip(), and SubmitReduction::submit().

3039  {
3040 
3041  if ( patch->flags.doFullElectrostatics ) {
3042  // In QM/MM simulations, atom charges form QM regions need special treatment.
3043  if ( qmForcesOn ) {
3044  return 1;
3045  }
3046  if ( ! myMgr->ungridForcesCount && ! myMgr->recipEvirCount ) return 0; // work to do, enqueue as usual
3047  myMgr->heldComputes.add(this);
3048  return 1; // don't enqueue yet
3049  }
3050 
3051  positionBox->skip();
3052  forceBox->skip();
3053 
3054  if ( ++(myMgr->noWorkCount) == myMgr->pmeComputes.size() ) {
3055  myMgr->noWorkCount = 0;
3056  myMgr->reduction->submit();
3057  }
3058 
3059  atomsChanged = 0;
3060 
3061  return 1; // no work for this step
3062 }
int size(void) const
Definition: ResizeArray.h:131
virtual void submit(void)=0
int add(const Elem &elem)
Definition: ResizeArray.h:101
Flags flags
Definition: Patch.h:128
int doFullElectrostatics
Definition: PatchTypes.h:23
void skip(void)
Definition: Box.h:63
ResizeArray< ComputePme * > pmeComputes
Definition: ComputePme.C:482

◆ setMgr()

void ComputePme::setMgr ( ComputePmeMgr mgr)
inline

Definition at line 58 of file ComputePme.h.

58 { myMgr = mgr; }

◆ ungridForces()

void ComputePme::ungridForces ( )

Definition at line 4090 of file ComputePme.C.

References ADD_VECTOR_OBJECT, ComputePmeUtil::alchDecouple, ComputePmeUtil::alchFepOn, ComputePmeUtil::alchOn, ResizeArray< Elem >::begin(), Box< Owner, Data >::close(), PmeRealSpace::compute_forces(), endi(), Results::f, Patch::flags, Patch::getNumAtoms(), iERROR(), iout, Flags::lattice, ComputePmeUtil::lesFactor, ComputePmeUtil::lesOn, ComputePmeUtil::LJPMEOn, NAMD_bug(), ComputePmeUtil::numGrids, Node::Object(), Box< Owner, Data >::open(), ComputePmeUtil::pairOn, ResizeArray< Elem >::resize(), scale_forces(), ComputePmeUtil::selfOn, Compute::sequence(), Node::simParameters, simParams, Results::slow, Flags::step, Vector::x, Vector::y, and Vector::z.

Referenced by doWork().

4090  {
4091 
4092  if ( sequence() != myMgr->compute_sequence ) NAMD_bug("ComputePme sequence mismatch in ungridForces()");
4093 
4095 
4096  localResults_alloc.resize(numLocalAtoms* ((numGrids>1 || selfOn)?2:1));
4097  Vector *localResults = localResults_alloc.begin();
4098  Vector *gridResults;
4099 
4100  if ( alchOn || lesOn || selfOn || pairOn ) {
4101  for(int i=0; i<numLocalAtoms; ++i) { localResults[i] = 0.; }
4102  gridResults = localResults + numLocalAtoms;
4103  } else {
4104  gridResults = localResults;
4105  }
4106 
4107  Vector pairForce = 0.;
4108  Lattice &lattice = patch->flags.lattice;
4109  int g = 0;
4110  if(!simParams->commOnly) {
4111  for ( g=0; g<numGrids; ++g ) {
4112 #ifdef NETWORK_PROGRESS
4113  CmiNetworkProgress();
4114 #endif
4115 
4116 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
4117  if ( offload ) {
4118  int errfound = 0;
4119  for ( int n=numGridAtoms[g], i=0; i<n; ++i ) {
4120  // Neither isnan() nor x != x worked when testing on Cray; this does.
4121  if ( ((int*)f_data_host)[3*i] == 0x7fffffff ) { errfound = 1; } // CUDA NaN
4122  gridResults[i].x = f_data_host[3*i];
4123  gridResults[i].y = f_data_host[3*i+1];
4124  gridResults[i].z = f_data_host[3*i+2];
4125  }
4126  if ( errfound ) {
4127  int errcount = 0;
4128  for ( int n=numGridAtoms[g], i=0; i<n; ++i ) {
4129  float f = f_data_host[3*i];
4130  if ( ((int*)f_data_host)[3*i] == 0x7fffffff ) { // CUDA NaN
4131  ++errcount;
4132  gridResults[i] = 0.;
4133  }
4134  }
4135  iout << iERROR << "Stray PME grid charges detected: "
4136  << errcount << " atoms on pe " << CkMyPe() << "\n" << endi;
4137  }
4138  } else
4139 #endif // NAMD_CUDA
4140  {
4141  myRealSpace[g]->compute_forces(myMgr->q_arr+g*myMgr->fsize, localGridData[g], gridResults);
4142  }
4143  scale_forces(gridResults, numGridAtoms[g], lattice);
4144 
4145  if (LJPMEOn) {
4146  if (0==g) {
4147  // finished loop g==0, next loop gathers
4148  // LJ-PME force contributions into upper buffer
4149  gridResults += numLocalAtoms;
4150  } else {
4151  // sum LJ-PME forces into electrostatic forces buffer
4152  for (int i=0; i < numLocalAtoms; i++) {
4153  localResults[i] += gridResults[i];
4154  }
4155  }
4156  } else if (alchOn) {
4157  float scale = 1.;
4158  BigReal elecLambdaUp, elecLambdaDown;
4159  BigReal alchLambda = simParams->getCurrentLambda(patch->flags.step);
4160  myMgr->alchLambda = alchLambda;
4161  BigReal alchLambda2 = simParams->getCurrentLambda2(patch->flags.step);
4162  myMgr->alchLambda2 = alchLambda2;
4163  elecLambdaUp = simParams->getElecLambda(alchLambda);
4164  elecLambdaDown = simParams->getElecLambda(1. - alchLambda);
4165 
4166  if ( g == 0 ) scale = elecLambdaUp;
4167  else if ( g == 1 ) scale = elecLambdaDown;
4168  else if ( g == 2 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
4169 
4170  if (alchDecouple) {
4171  if ( g == 2 ) scale = 1 - elecLambdaUp;
4172  else if ( g == 3 ) scale = 1 - elecLambdaDown;
4173  else if ( g == 4 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
4174  }
4175  int nga = 0;
4176  if (!alchDecouple) {
4177  if (g < 2 ) {
4178  for(int i=0; i<numLocalAtoms; ++i) {
4179  if ( localPartition[i] == 0 || localPartition[i] == (g+1) || localPartition[i] == (g+3) ) {
4180  // (g=0: only partition 0 and partiton 1 and partion 3)
4181  // (g=1: only partition 0 and partiton 2 and partion 4)
4182  localResults[i] += gridResults[nga++] * scale;
4183  }
4184  }
4185  } else {
4186  for(int i=0; i<numLocalAtoms; ++i) {
4187  if ( localPartition[i] == 0 ) {
4188  // (g=2: only partition 0)
4189  localResults[i] += gridResults[nga++] * scale;
4190  }
4191  }
4192  }
4193  } else { // alchDecouple
4194  if ( g < 2 ) {
4195  for(int i=0; i<numLocalAtoms; ++i) {
4196  if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
4197  // g = 0: partition 0 or partition 1
4198  // g = 1: partition 0 or partition 2
4199  localResults[i] += gridResults[nga++] * scale;
4200  }
4201  }
4202  }
4203  else {
4204  for(int i=0; i<numLocalAtoms; ++i) {
4205  if ( localPartition[i] == (g-1) || localPartition[i] == (g-4)) {
4206  // g = 2: partition 1 only
4207  // g = 3: partition 2 only
4208  // g = 4: partition 0 only
4209  localResults[i] += gridResults[nga++] * scale;
4210  }
4211  }
4212  }
4213  }
4214  } else if ( lesOn ) {
4215  float scale = 1.;
4216  if ( alchFepOn ) {
4217  BigReal alchLambda = simParams->getCurrentLambda(patch->flags.step);
4218  myMgr->alchLambda = alchLambda;
4219  BigReal alchLambda2 = simParams->getCurrentLambda2(patch->flags.step);
4220  myMgr->alchLambda2 = alchLambda2;
4221  if ( g == 0 ) scale = alchLambda;
4222  else if ( g == 1 ) scale = 1. - alchLambda;
4223  } else if ( lesOn ) {
4224  scale = 1.0 / (float)lesFactor;
4225  }
4226  int nga = 0;
4227  for(int i=0; i<numLocalAtoms; ++i) {
4228  if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
4229  localResults[i] += gridResults[nga++] * scale;
4230  }
4231  }
4232  } else if ( selfOn ) {
4233  PmeParticle *lgd = localGridData[g];
4234  int nga = 0;
4235  for(int i=0; i<numLocalAtoms; ++i) {
4236  if ( localPartition[i] == 1 ) {
4237  pairForce += gridResults[nga]; // should add up to almost zero
4238  localResults[i] += gridResults[nga++];
4239  }
4240  }
4241  } else if ( pairOn ) {
4242  if ( g == 0 ) {
4243  int nga = 0;
4244  for(int i=0; i<numLocalAtoms; ++i) {
4245  if ( localPartition[i] == 1 ) {
4246  pairForce += gridResults[nga];
4247  }
4248  if ( localPartition[i] == 1 || localPartition[i] == 2 ) {
4249  localResults[i] += gridResults[nga++];
4250  }
4251  }
4252  } else if ( g == 1 ) {
4253  int nga = 0;
4254  for(int i=0; i<numLocalAtoms; ++i) {
4255  if ( localPartition[i] == g ) {
4256  pairForce -= gridResults[nga]; // should add up to almost zero
4257  localResults[i] -= gridResults[nga++];
4258  }
4259  }
4260  } else {
4261  int nga = 0;
4262  for(int i=0; i<numLocalAtoms; ++i) {
4263  if ( localPartition[i] == g ) {
4264  localResults[i] -= gridResults[nga++];
4265  }
4266  }
4267  }
4268  }
4269  }
4270  }
4271 
4272  Vector *results_ptr = localResults;
4273 
4274  // add in forces
4275  {
4276  Results *r = forceBox->open();
4277  Force *f = r->f[Results::slow];
4278  int numAtoms = patch->getNumAtoms();
4279 
4280  if ( ! myMgr->strayChargeErrors && ! simParams->commOnly ) {
4281  for(int i=0; i<numAtoms; ++i) {
4282  f[i].x += results_ptr->x;
4283  f[i].y += results_ptr->y;
4284  f[i].z += results_ptr->z;
4285  ++results_ptr;
4286  }
4287  }
4288  forceBox->close(&r);
4289  }
4290 
4291  if ( pairOn || selfOn ) {
4292  ADD_VECTOR_OBJECT(myMgr->reduction,REDUCTION_PAIR_ELECT_FORCE,pairForce);
4293  }
4294 
4295 }
static Node * Object()
Definition: Node.h:86
void compute_forces(const float *const *q_arr, const PmeParticle p[], Vector f[])
Definition: PmeRealSpace.C:141
int getNumAtoms() const
Definition: Patch.h:105
int sequence(void)
Definition: Compute.h:64
Definition: Vector.h:72
SimParameters * simParameters
Definition: Node.h:181
static int numGrids
Definition: ComputePme.h:32
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
BigReal z
Definition: Vector.h:74
static Bool alchOn
Definition: ComputePme.h:33
#define iout
Definition: InfoStream.h:51
Flags flags
Definition: Patch.h:128
void resize(int i)
Definition: ResizeArray.h:84
void NAMD_bug(const char *err_msg)
Definition: common.C:195
Force * f[maxNumForces]
Definition: PatchTypes.h:150
static void scale_forces(Vector f[], int N, Lattice &lattice)
Definition: PmeBase.inl:60
BigReal x
Definition: Vector.h:74
static Bool LJPMEOn
Definition: ComputePme.h:43
static Bool alchDecouple
Definition: ComputePme.h:36
static int lesFactor
Definition: ComputePme.h:39
#define simParams
Definition: Output.C:131
static Bool pairOn
Definition: ComputePme.h:40
static Bool lesOn
Definition: ComputePme.h:38
iterator begin(void)
Definition: ResizeArray.h:36
BigReal y
Definition: Vector.h:74
#define ADD_VECTOR_OBJECT(R, RL, D)
Definition: ReductionMgr.h:28
static Bool selfOn
Definition: ComputePme.h:41
Lattice lattice
Definition: PatchTypes.h:46
std::ostream & iERROR(std::ostream &s)
Definition: InfoStream.C:83
static Bool alchFepOn
Definition: ComputePme.h:34
Data * open(void)
Definition: Box.h:39
void close(Data **const t)
Definition: Box.h:49
double BigReal
Definition: common.h:123
int step
Definition: PatchTypes.h:16

Friends And Related Function Documentation

◆ ComputePmeMgr

friend class ComputePmeMgr
friend

Definition at line 60 of file ComputePme.h.


The documentation for this class was generated from the following files: