1 #ifndef COMPUTEPMECUDAMGR_H     2 #define COMPUTEPMECUDAMGR_H     5 #include <cuda_runtime.h>       8 #include <hip/hip_runtime.h>    16 #include "ComputePmeCUDAMgr.decl.h"    19 #if defined(NAMD_CUDA) || defined(NAMD_HIP)    31     atomIndexCapacity = 0;
    35     overflowAtomCapacity = 0;
    37     overflowAtomIndexCapacity = 0;
    46     lock_ = CmiCreateLock();
    49     CmiDestroyLock(lock_);
    52   int addAtoms(
const int natom, 
const CudaAtom* src, 
const std::vector<float*>& lambdaArrays) {
    53     return addAtoms_(natom, src, NULL, lambdaArrays);
    57     return addAtoms_(natom, src, index, lambdaArrays);
    63     if (overflowEnd-overflowStart > 0) {
    64       resize_((
void **)&
atom, numAtoms, atomCapacity, 
sizeof(
CudaAtom));
    65       if (useIndex) resize_((
void **)&
atomIndex, numAtoms, atomIndexCapacity, 
sizeof(
int));
   115       if (
bool(
simParams.alchElecLambdaStart)) {
   131     return patchPos.data();
   135     return patchPos.size();
   144       NAMD_bug(
"PmeAtomStorage::getAtomIndex, no indexing enabled");
   173   int overflowAtomCapacity;
   174   int overflowAtomIndexCapacity;
   180   std::vector<int> patchPos;
   184   int atomIndexCapacity;
   187   void resize_(
void **array, 
int sizeRequested, 
int& arrayCapacity, 
const size_t sizeofType) {
   189     if (*array != NULL && arrayCapacity >= sizeRequested) 
return;
   192     int newArrayCapacity = (int)(sizeRequested*1.5);
   193     void* newArray = alloc_(sizeofType*newArrayCapacity);
   195     if (*array != NULL) {
   197       memcpy_(newArray, *array, arrayCapacity*sizeofType);
   203     arrayCapacity = newArrayCapacity;
   207   virtual void memcpy_(
void *dst, 
const void* src, 
const int size) {
   208     memcpy(dst, src, size);
   215   template <
typename array_type>
   216   void copyWithIndex_(array_type* dst, 
const array_type* src, 
const int natom, 
const int* indexSrc) {
   217     for (
int i=0;i < natom;i++) dst[i] = src[indexSrc[i]];
   221   virtual void* alloc_(
const size_t size)=0;
   224   virtual void dealloc_(
void *p)=0;
   228   int addAtoms_(
const int natom, 
const CudaAtom* src, 
const int* index, 
const std::vector<float*>& lambdaArrays) {
   232     int patchInd = patchPos.size();
   233     int ppos = (patchInd == 0) ? natom : patchPos[patchInd-1] + natom;
   234     patchPos.push_back(ppos);
   236     bool overflow = 
false;
   239     if (numAtoms > atomCapacity || (useIndex && numAtoms > atomIndexCapacity)) {
   243       if (overflowEnd-overflowStart == 0) {
   247       overflowEnd += natom;
   248       if (overflowEnd-overflowStart > overflowAtomCapacity) {
   249         resize_((
void **)&
overflowAtom, overflowEnd-overflowStart, overflowAtomCapacity, 
sizeof(
CudaAtom));
   251       if (useIndex && overflowEnd-overflowStart > overflowAtomIndexCapacity) {
   252         resize_((
void **)&
overflowAtomIndex, overflowEnd-overflowStart, overflowAtomIndexCapacity, 
sizeof(
int));
   263         if (useIndex) memcpy_(
overflowAtomIndex+overflowEnd-overflowStart-natom, index, natom*
sizeof(
int));
   264         copyWithIndex_(
overflowAtom+overflowEnd-overflowStart-natom, src, natom, index);
   267             if (lambdaArrays[i] != NULL) {
   276             if (lambdaArrays[i] != NULL) {
   288         if (useIndex) memcpy_(
atomIndex+pos, index, natom*
sizeof(
int));
   289         copyWithIndex_(
atom+pos, src, natom, index);
   292             if (lambdaArrays[i] != NULL) {
   301             if (lambdaArrays[i] != NULL) {
   393 class CProxy_ComputePmeCUDADevice;
   396   CProxy_ComputePmeCUDADevice* 
dev;
   406     const int pencilIndexY, 
const int pencilIndexZ, 
const int ylo, 
const int yhi, 
const int zlo, 
const int zhi);
   414   int pencilCapacity[9+1];
   419 class CProxy_ComputePmeCUDAMgr;
   427     int deviceID_in, 
int pmePencilType_in, CProxy_ComputePmeCUDAMgr mgrProxy_in,
   428     CProxy_PmeAtomFiler pmeAtomFiler_in);
   459   bool doVirial, doEnergy;
   465   int ylo, yhi, zlo, zhi;
   467   int yNBlocks, zNBlocks;
   471   int pencilIndexY, pencilIndexZ;
   474   int numNeighborsExpected;
   480   CmiNodeLock lock_numHomePatchesMerged;
   481   CmiNodeLock lock_numPencils;
   482   CmiNodeLock lock_numNeighborsRecv;
   483   CmiNodeLock lock_recvAtoms;
   493   std::vector< PmeAtomStorage* > pmeAtomStorage[2];
   494   std::vector<bool> pmeAtomStorageAllocatedHere;
   498   std::vector<int> numPencils[2];
   501   struct PencilLocation {
   505     int pencilPatchIndex;
   506     PencilLocation(
int pp, 
int pencilPatchIndex) : pp(pp), pencilPatchIndex(pencilPatchIndex) {}
   510   std::vector< std::vector<PencilLocation> > plList[2];
   513   std::vector< PmeForceMsg* > homePatchForceMsgs[2];
   518   std::vector< std::vector<int> > homePatchIndexList[2];
   521   int numNeighborsRecv;
   524   int numHomePatchesRecv;
   527   int numHomePatchesMerged;
   530   std::vector< PmeForcePencilMsg* > neighborForcePencilMsgs;
   534   std::vector<int> neighborPatchIndex;
   548   std::array<PmeRealSpaceCompute*, NUM_GRID_MAX> pmeRealSpaceComputes;
   549   std::array<bool, NUM_GRID_MAX> enabledGrid;
   551   std::array<size_t, NUM_GRID_MAX> forceCapacities;
   552   std::array<CudaForce*, NUM_GRID_MAX> forces;
   562   std::array<int, NUM_GRID_MAX> forceReady;
   565   CProxy_ComputePmeCUDAMgr mgrProxy;
   568   CProxy_PmeAtomFiler pmeAtomFiler;
   571   CProxy_CudaPmePencilXYZ pmePencilXYZ;
   572   CProxy_CudaPmePencilXY pmePencilXY;
   573   CProxy_CudaPmePencilX pmePencilX;
   576   double beforeWalltime;
   598   void recvPencils(CProxy_CudaPmePencilXY 
xy, CProxy_CudaPmePencilZ z);
   599   void recvPencils(CProxy_CudaPmePencilX x, CProxy_CudaPmePencilY y, CProxy_CudaPmePencilZ z);
   614     CProxy_ComputePmeCUDAMgr mgrProxy(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
   615     return mgrProxy.ckLocalBranch();    
   620   void restrictToMaxPMEPencils();
   625   int numNodesContributed;
   629   std::vector<int> numHomePatchesList;
   643   std::vector<int> xPes;
   644   std::vector<int> yPes;
   645   std::vector<int> zPes;
   651   std::vector<IJ> ijPencilX;
   652   std::vector<IJ> ijPencilY;
   653   std::vector<IJ> ijPencilZ;
   659   std::vector<NodeDevice> nodeDeviceList;
   662   CProxy_PmeAtomFiler pmeAtomFiler;
   665   std::vector<CProxy_ComputePmeCUDADevice> deviceProxy;
   672   std::vector<ExtraDevice> extraDevices;
   675   CProxy_CudaPmePencilXYZ pmePencilXYZ;
   676   CProxy_CudaPmePencilXY pmePencilXY;
   677   CProxy_CudaPmePencilX pmePencilX;
   678   CProxy_CudaPmePencilY pmePencilY;
   679   CProxy_CudaPmePencilZ pmePencilZ;
   687 #endif // COMPUTEPMECUDAMGR_H int getHomeNode(PatchID patchID)
 
void sendAtomsToNeighbor(int y, int z, int atomIval)
 
void sendForcesToNeighbors()
 
void recvForcesFromNeighbor(PmeForcePencilMsg *msg)
 
CProxy_ComputePmeCUDAMgr getMgrProxy()
 
void initialize(CkQdMsg *msg)
 
void gatherForceDone(unsigned int iGrid)
 
int getDeviceIDPencilX(int i, int j)
 
void sendForcesToPatch(PmeForceMsg *forceMsg)
 
void setPencilProxy(CProxy_CudaPmePencilXYZ pmePencilXYZ_in)
 
void getHomePencil(PatchID patchID, int &homey, int &homez)
 
int addAtomsWithIndex(const int natom, const CudaAtom *src, const int *index, const std::vector< float *> &lambdaArrays)
 
void recvAtomFiler(CProxy_PmeAtomFiler filer)
 
void recvAtoms(PmeAtomMsg *msg)
 
int addAtoms(const int natom, const CudaAtom *src, const std::vector< float *> &lambdaArrays)
 
void setupAlch(const SimParameters &simParams)
 
std::vector< bool > enabledGrid
 
void mergeForcesOnPatch(int homePatchIndex)
 
PmeAtomStorage(const bool useIndex)
 
void createDevicesAndAtomFiler()
 
const unsigned int NUM_GRID_MAX
 
void recvDevices(RecvDeviceMsg *msg)
 
int getDeviceIDPencilZ(int i, int j)
 
std::vector< int > overflowAtomElecFactorCapacities
 
void fileAtoms(const int numAtoms, const CudaAtom *atoms, Lattice &lattice, const PmeGrid &pmeGrid, const int pencilIndexY, const int pencilIndexZ, const int ylo, const int yhi, const int zlo, const int zhi)
 
void initialize(PmeGrid &pmeGrid_in, int pencilIndexY_in, int pencilIndexZ_in, int deviceID_in, int pmePencilType_in, CProxy_ComputePmeCUDAMgr mgrProxy_in, CProxy_PmeAtomFiler pmeAtomFiler_in)
 
static ComputePmeCUDAMgr * Object()
 
bool isGridEnabled(unsigned int i) const
 
void NAMD_bug(const char *err_msg)
 
int getDevicePencilZ(int i, int j)
 
void registerRecvAtomsFromNeighbor()
 
void recvAtomsFromNeighbor(PmeAtomPencilMsg *msg)
 
virtual ~PmeAtomStorage()
 
void initializePatches(int numHomePatches_in)
 
unsigned int totalFactorArrays
 
int getDevice(int i, int j)
 
int getDeviceIDPencilY(int i, int j)
 
void sendAtomsToNeighbors()
 
void recvAtoms(PmeAtomMsg *msg)
 
void activate_pencils(CkQdMsg *msg)
 
ComputePmeCUDAMgr_SDAG_CODE
 
int getNode(int i, int j)
 
void initialize_pencils(CkQdMsg *msg)
 
std::vector< float * > overflowAtomElecFactorArrays
 
int getDevicePencilY(int i, int j)
 
int * getAtomIndex(int p)
 
void recvPencils(CProxy_CudaPmePencilXYZ xyz)
 
CProxy_ComputePmeCUDADevice * dev
 
void gatherForceDoneSubset(int first, int last)
 
float * getAtomElecFactors(unsigned int iGrid)
 
NumDevicesMsg(int numDevices)
 
bool isPmeDevice(int deviceID)
 
std::vector< float * > atomElecFactorArrays