9 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 11 #define __thread __declspec(thread) 23 cudaPmeOneDevice = NULL;
24 cudaGlobalMasterObject =
nullptr;
32 NAMD_bug(
"ComputeCUDAMgr cannot be migrated");
36 cudaPmeOneDevice = NULL;
37 cudaGlobalMasterObject =
nullptr;
44 for (
int i=0;i < numDevices;i++) {
45 if (cudaNonbondedTablesList[i] != NULL)
delete cudaNonbondedTablesList[i];
46 if (cudaComputeNonbondedList[i] != NULL)
delete cudaComputeNonbondedList[i];
48 if (computeBondedCUDAList[i] != NULL)
delete computeBondedCUDAList[i];
64 if (msg != NULL)
delete msg;
67 #ifdef NODEGROUP_FORCE_REGISTER 68 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
69 PatchData *pdata = cpdata.ckLocalBranch();
71 pdata->devData.resize(numDevices);
75 allocate_host<bool*>(&(pdata->h_devHasForces),ndevs);
106 allocate_host<CudaExclusionStage*>(&(pdata->h_tupleDataStage.modifiedExclusion),
deviceCUDA->
getNumDevice());
112 allocate_host<unsigned int*>(&(pdata->d_queues), ndevs);
113 allocate_host<unsigned int>(&(pdata->d_queueCounters), ndevs);
115 cudaCheck(cudaMemset(pdata->d_queueCounters, 0,
sizeof(
unsigned int)*ndevs));
121 pdata->maxNumBonds.store(0);
122 pdata->maxNumAngles.store(0);
123 pdata->maxNumDihedrals.store(0);
124 pdata->maxNumImpropers.store(0);
125 pdata->maxNumModifiedExclusions.store(0);
126 pdata->maxNumExclusions.store(0);
127 pdata->maxNumCrossterms.store(0);
128 pdata->devicePatchMapFlag.resize(CkNumPes(), 0);
129 #ifdef NAMD_NCCL_ALLREDUCE 133 pdata->ncclId =
deviceCUDA->getNcclUniqueId();
139 allocate_device<double3*>(&
curSMDCOM,
sizeof(double3*)*numDevices);
149 for(
int i=0;i<
simParams->groupRestraintsCount;i++)
151 allocate_device<double3*>(&
curGrp1COM[i],
sizeof(double3*)*numDevices);
152 allocate_device<double3*>(&
curGrp2COM[i],
sizeof(double3*)*numDevices);
163 cudaNonbondedTablesList.resize(numDevices, NULL);
164 cudaComputeNonbondedList.resize(numDevices, NULL);
166 computeBondedCUDAList.resize(numDevices, NULL);
168 if (cudaPmeOneDevice != NULL)
delete cudaPmeOneDevice;
169 cudaPmeOneDevice = NULL;
183 if ( CkMyRank() )
NAMD_bug(
"ComputeCUDAMgr::update() should be called only by rank 0");
187 cudaNonbondedTablesList[deviceID]->updateTables();
193 CProxy_ComputeCUDAMgr computeCUDAMgrProxy = CkpvAccess(BOCclass_group).computeCUDAMgr;
194 ComputeCUDAMgr* computeCUDAMgr = computeCUDAMgrProxy.ckLocalBranch();
195 if (computeCUDAMgr == NULL)
196 NAMD_bug(
"getComputeCUDAMgr, unable to locate local branch of BOC entry ComputeCUDAMgr");
197 return computeCUDAMgr;
208 pmeGrid.
dim2 = pmeGrid.
K2;
209 pmeGrid.
dim3 = 2 * (pmeGrid.
K3/2 + 1);
220 #ifdef NODEGROUP_FORCE_REGISTER 224 if (cudaPmeOneDevice != NULL)
delete cudaPmeOneDevice;
226 return cudaPmeOneDevice;
230 return cudaPmeOneDevice;
238 if (cudaComputeNonbondedList.at(deviceID) != NULL)
239 NAMD_bug(
"ComputeCUDAMgr::createCudaComputeNonbonded called twice");
240 if (cudaNonbondedTablesList.at(deviceID) == NULL)
241 NAMD_bug(
"ComputeCUDAMgr::createCudaComputeNonbonded, non-bonded CUDA tables not created");
244 cudaComputeNonbondedList[deviceID] =
new CudaComputeNonbonded(c, deviceID, *cudaNonbondedTablesList[deviceID], doStreaming);
245 return cudaComputeNonbondedList[deviceID];
256 NAMD_bug(
"ComputeCUDAMgr::getCudaComputeNonbonded(), device not created yet");
264 ComputeBondedCUDA* ComputeCUDAMgr::createComputeBondedCUDA(
ComputeID c,
ComputeMgr* computeMgr) {
266 if (computeBondedCUDAList.at(deviceID) != NULL)
267 NAMD_bug(
"ComputeCUDAMgr::createComputeBondedCUDA called twice");
268 if (cudaNonbondedTablesList.at(deviceID) == NULL)
269 NAMD_bug(
"ComputeCUDAMgr::createCudaComputeNonbonded, non-bonded CUDA tables not created");
270 computeBondedCUDAList[deviceID] =
new ComputeBondedCUDA(c, computeMgr, deviceID, *cudaNonbondedTablesList[deviceID]);
271 return computeBondedCUDAList[deviceID];
277 ComputeBondedCUDA* ComputeCUDAMgr::getComputeBondedCUDA() {
280 ComputeBondedCUDA* p = computeBondedCUDAList[deviceID];
282 NAMD_bug(
"ComputeCUDAMgr::getComputeBondedCUDA(), device not created yet");
285 #endif // BONDED_CUDA 288 return cudaGlobalMasterObject;
292 iout <<
iINFO <<
"Creating CUDAGlobalMaster on PE " << CkMyPe() <<
'\n' <<
endi;
293 if (cudaGlobalMasterObject) {
294 return cudaGlobalMasterObject;
298 cudaGlobalMasterObject = std::make_shared<CudaGlobalMasterServer>(deviceID,
simParams->cudaGlobalProfilingFreq);
299 return cudaGlobalMasterObject;
302 #endif // defined(NAMD_CUDA) || defined(NAMD_HIP) 304 #include "ComputeCUDAMgr.def.h"
std::ostream & iINFO(std::ostream &s)
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
SimParameters * simParameters
void initialize(CkQdMsg *msg)
std::ostream & endi(std::ostream &s)
int getGlobalDevice() const
void NAMD_bug(const char *err_msg)
static ComputeCUDAMgr * getComputeCUDAMgr()
std::shared_ptr< CudaGlobalMasterServer > createCudaGlobalMaster()
int getDeviceIDbyRank(int rank)
CudaComputeNonbonded * getCudaComputeNonbonded()
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
__thread DeviceCUDA * deviceCUDA
CudaPmeOneDevice * createCudaPmeOneDevice()
CudaPmeOneDevice * getCudaPmeOneDevice()