9 #if defined(NAMD_CUDA) || defined(NAMD_HIP)    11 #define __thread __declspec(thread)    23   cudaPmeOneDevice = NULL;  
    24   cudaGlobalMasterObject = 
nullptr;
    32   NAMD_bug(
"ComputeCUDAMgr cannot be migrated");
    36   cudaPmeOneDevice = NULL;  
    37   cudaGlobalMasterObject = 
nullptr;
    44   for (
int i=0;i < numDevices;i++) {
    45     if (cudaNonbondedTablesList[i] != NULL) 
delete cudaNonbondedTablesList[i];
    46     if (cudaComputeNonbondedList[i] != NULL) 
delete cudaComputeNonbondedList[i];
    48     if (computeBondedCUDAList[i] != NULL) 
delete computeBondedCUDAList[i];
    64         if (msg != NULL) 
delete msg;
    67 #ifdef NODEGROUP_FORCE_REGISTER    68     CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
    69     PatchData *pdata = cpdata.ckLocalBranch();
    71     pdata->devData.resize(numDevices);
    75       allocate_host<bool*>(&(pdata->h_devHasForces),ndevs);
    76       allocate_host<double*>(&(pdata->h_soa_fb_x),  ndevs);
    77       allocate_host<double*>(&(pdata->h_soa_fb_y),  ndevs);
    78       allocate_host<double*>(&(pdata->h_soa_fb_z),  ndevs);
    80       allocate_host<double*>(&(pdata->h_soa_fn_x),  ndevs);
    81       allocate_host<double*>(&(pdata->h_soa_fn_y),  ndevs);
    82       allocate_host<double*>(&(pdata->h_soa_fn_z),  ndevs);
    84       allocate_host<double*>(&(pdata->h_soa_fs_x),  ndevs);
    85       allocate_host<double*>(&(pdata->h_soa_fs_y),  ndevs);
    86       allocate_host<double*>(&(pdata->h_soa_fs_z),  ndevs);
    88       allocate_host<double*>(&(pdata->h_soa_pos_x), ndevs);
    89       allocate_host<double*>(&(pdata->h_soa_pos_y), ndevs);
    90       allocate_host<double*>(&(pdata->h_soa_pos_z), ndevs);
   131       allocate_host<CudaExclusionStage*>(&(pdata->h_tupleDataStage.modifiedExclusion), 
deviceCUDA->
getNumDevice());
   137     allocate_host<unsigned int*>(&(pdata->d_queues), ndevs);
   138     allocate_host<unsigned int>(&(pdata->d_queueCounters), ndevs);
   140     cudaCheck(cudaMemset(pdata->d_queueCounters, 0, 
sizeof(
unsigned int)*ndevs));
   146     pdata->maxNumBonds.store(0);
   147     pdata->maxNumAngles.store(0);
   148     pdata->maxNumDihedrals.store(0);
   149     pdata->maxNumImpropers.store(0);
   150     pdata->maxNumModifiedExclusions.store(0);
   151     pdata->maxNumExclusions.store(0);
   152     pdata->maxNumCrossterms.store(0);
   153     pdata->devicePatchMapFlag.resize(CkNumPes(), 0);
   154 #ifdef NAMD_NCCL_ALLREDUCE   158     pdata->ncclId = 
deviceCUDA->getNcclUniqueId(); 
   164       allocate_device<double3*>(&
curSMDCOM, 
sizeof(double3*)*numDevices);
   174       for(
int i=0;i<
simParams->groupRestraintsCount;i++)
   176           allocate_device<double3*>(&
curGrp1COM[i], 
sizeof(double3*)*numDevices);
   177           allocate_device<double3*>(&
curGrp2COM[i], 
sizeof(double3*)*numDevices);
   188   cudaNonbondedTablesList.resize(numDevices, NULL);
   189   cudaComputeNonbondedList.resize(numDevices, NULL);
   191   computeBondedCUDAList.resize(numDevices, NULL);
   193   if (cudaPmeOneDevice != NULL) 
delete cudaPmeOneDevice;
   194   cudaPmeOneDevice = NULL;
   208   if ( CkMyRank() ) 
NAMD_bug(
"ComputeCUDAMgr::update() should be called only by rank 0");
   212     cudaNonbondedTablesList[deviceID]->updateTables();
   218   CProxy_ComputeCUDAMgr computeCUDAMgrProxy = CkpvAccess(BOCclass_group).computeCUDAMgr;
   219   ComputeCUDAMgr* computeCUDAMgr = computeCUDAMgrProxy.ckLocalBranch();
   220   if (computeCUDAMgr == NULL)
   221     NAMD_bug(
"getComputeCUDAMgr, unable to locate local branch of BOC entry ComputeCUDAMgr");
   222   return computeCUDAMgr;
   233   pmeGrid.
dim2 = pmeGrid.
K2;
   234   pmeGrid.
dim3 = 2 * (pmeGrid.
K3/2 + 1);
   245 #ifdef NODEGROUP_FORCE_REGISTER   249   if (cudaPmeOneDevice != NULL) 
delete cudaPmeOneDevice;
   251   return cudaPmeOneDevice;
   255   return cudaPmeOneDevice;
   263   if (cudaComputeNonbondedList.at(deviceID) != NULL)
   264     NAMD_bug(
"ComputeCUDAMgr::createCudaComputeNonbonded called twice");
   265   if (cudaNonbondedTablesList.at(deviceID) == NULL)
   266     NAMD_bug(
"ComputeCUDAMgr::createCudaComputeNonbonded, non-bonded CUDA tables not created");
   269   cudaComputeNonbondedList[deviceID] = 
new CudaComputeNonbonded(c, deviceID, *cudaNonbondedTablesList[deviceID], doStreaming);
   270   return cudaComputeNonbondedList[deviceID];
   281     NAMD_bug(
"ComputeCUDAMgr::getCudaComputeNonbonded(), device not created yet");
   289 ComputeBondedCUDA* ComputeCUDAMgr::createComputeBondedCUDA(
ComputeID c, 
ComputeMgr* computeMgr) {
   291   if (computeBondedCUDAList.at(deviceID) != NULL)
   292     NAMD_bug(
"ComputeCUDAMgr::createComputeBondedCUDA called twice");
   293   if (cudaNonbondedTablesList.at(deviceID) == NULL)
   294     NAMD_bug(
"ComputeCUDAMgr::createCudaComputeNonbonded, non-bonded CUDA tables not created");
   295   computeBondedCUDAList[deviceID] = 
new ComputeBondedCUDA(c, computeMgr, deviceID, *cudaNonbondedTablesList[deviceID]);
   296   return computeBondedCUDAList[deviceID];
   302 ComputeBondedCUDA* ComputeCUDAMgr::getComputeBondedCUDA() {
   305   ComputeBondedCUDA* p = computeBondedCUDAList[deviceID];
   307     NAMD_bug(
"ComputeCUDAMgr::getComputeBondedCUDA(), device not created yet");
   310 #endif // BONDED_CUDA   313   return cudaGlobalMasterObject;
   317   iout << 
iINFO << 
"Creating CUDAGlobalMaster on PE " << CkMyPe() << 
'\n' << 
endi;
   318   if (cudaGlobalMasterObject) {
   319     return cudaGlobalMasterObject;
   323   cudaGlobalMasterObject = std::make_shared<CudaGlobalMasterServer>(deviceID, 
simParams->cudaGlobalProfilingFreq);
   324   return cudaGlobalMasterObject;
   327 #endif // defined(NAMD_CUDA) || defined(NAMD_HIP)   329 #include "ComputeCUDAMgr.def.h" 
std::ostream & iINFO(std::ostream &s)
 
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
 
SimParameters * simParameters
 
void initialize(CkQdMsg *msg)
 
std::ostream & endi(std::ostream &s)
 
int getGlobalDevice() const
 
void NAMD_bug(const char *err_msg)
 
static ComputeCUDAMgr * getComputeCUDAMgr()
 
std::shared_ptr< CudaGlobalMasterServer > createCudaGlobalMaster()
 
int getDeviceIDbyRank(int rank)
 
CudaComputeNonbonded * getCudaComputeNonbonded()
 
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
 
__thread DeviceCUDA * deviceCUDA
 
CudaPmeOneDevice * createCudaPmeOneDevice()
 
CudaPmeOneDevice * getCudaPmeOneDevice()