12 #include "middle-conv.h"    15 #define MIN_DEBUG_LEVEL 3    19 #include "ComputeMgr.decl.h"    21 #include "ProxyMgr.decl.h"   110 #if defined(NAMD_CUDA) || defined(NAMD_HIP)   112 #define __thread __declspec(thread)   119     CkpvAccess(BOCclass_group).computeMgr = thisgroup;
   123     computeDPMEObject = 0;
   124     computeEwaldObject = 0;
   127     masterServerObject = NULL;
   132     delete computeNonbondedWorkArrays;
   133     if (masterServerObject != NULL) 
delete masterServerObject;
   134     for (
auto& loader: CudaGlobalMasterClientDlloaders) {
   136         iout << 
iINFO << 
"Close library " << loader->LibName() << 
"\n" << 
endi;
   137         loader->DLCloseLib();
   144     updateComputesReturnEP = ep;
   145     updateComputesReturnChareID = chareID;
   146     updateComputesCount = CkNumPes();
   150         NAMD_bug(
"updateComputes signaled on wrong Pe!");
   153     CkStartQD(CkIndex_ComputeMgr::updateComputes2((CkQdMsg*)0),&thishandle);
   160     CProxy_WorkDistrib wd(CkpvAccess(BOCclass_group).workDistrib);
   167     if ( skipSplitting ) {
   168       CProxy_ComputeMgr(thisgroup).updateLocalComputes();
   170       CProxy_ComputeMgr(thisgroup).splitComputes();
   177   if ( ! CkMyRank() ) {
   181     for (
int i=0; i<nc; i++) {
   185           CkPrintf(
"Warning: unable to partition compute %d\n", i);
   191         if (computeMap->
newNode(i) == -1) {
   194         for ( 
int j=1; j<nnp; ++j ) {
   205     CkStartQD(CkIndex_ComputeMgr::splitComputes2((CkQdMsg*)0), &thishandle);
   212     CProxy_ComputeMgr(thisgroup).updateLocalComputes();
   218     CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
   219     ProxyMgr *proxyMgr = pm.ckLocalBranch();
   225     for (
int i=0; i<nc; i++) {
   227         if ( computeMap->
node(i) == CkMyPe() &&
   233            if ( computeMap->
newNode(i) == CkMyPe() ) computeFlag.
add(i); 
   235         if (computeMap->
newNode(i) == CkMyPe() && computeMap->
node(i) != CkMyPe())
   238             for (
int n=0; n < computeMap->
numPids(i); n++)
   243         else if (computeMap->
node(i) == CkMyPe() &&
   244                  (computeMap->
newNode(i) != -1 && computeMap->
newNode(i) != CkMyPe() ))
   254         CkStartQD(CkIndex_ComputeMgr::updateLocalComputes2((CkQdMsg*)0), &thishandle);
   262     CProxy_ComputeMgr(thisgroup).updateLocalComputes3();
   269     CProxy_ProxyMgr pm(CkpvAccess(BOCclass_group).proxyMgr);
   270     ProxyMgr *proxyMgr = pm.ckLocalBranch();
   276     if ( ! CkMyRank() ) {
   277       for (
int i=0; i<nc; i++) {
   279         if (computeMap->
newNode(i) != -1) {
   286     for(
int i=0; i<computeFlag.
size(); i++) createCompute(computeFlag[i], computeMap);
   293         CkStartQD(CkIndex_ComputeMgr::updateLocalComputes4((CkQdMsg*)0), &thishandle);
   301     CProxy_ComputeMgr(thisgroup).updateLocalComputes5();
   318     if ( ! CkMyRank() ) {
   345         CkStartQD(CkIndex_ComputeMgr::doneUpdateLocalComputes(), &thishandle);
   352     DebugM(4, 
"doneUpdateLocalComputes on Pe("<<CkMyPe()<<
")\n");
   353     void *msg = CkAllocMsg(0,0,0);
   354     CkSendMsgBranch(updateComputesReturnEP,msg,0,updateComputesReturnChareID);
   358 #if defined(NAMD_CUDA) || defined(NAMD_HIP)   369 ComputeBondedCUDA* getComputeBondedCUDA() {
   391 #ifdef NODEGROUP_FORCE_REGISTER         392     CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
   393     PatchData *patchData = cpdata.ckLocalBranch();
   394     suspendCounter=&(patchData->suspendCounter);
   397     switch ( map->
type(i) )
   400 #if defined(NAMD_CUDA) || defined(NAMD_HIP)   404                                      computeNonbondedWorkArrays,
   412         for (
int j = 0; j < 8; j++) {
   413           pid8[j] = map->computeData[i].pids[j].pid;
   414           trans8[j] = map->computeData[i].pids[j].trans;
   417              computeNonbondedWorkArrays,
   425         pid2[0] = map->computeData[i].pids[0].pid;
   426         trans2[0] = map->computeData[i].pids[0].trans;
   427         pid2[1] = map->computeData[i].pids[1].pid;
   428         trans2[1] = map->computeData[i].pids[1].trans;
   429 #if defined(NAMD_CUDA) || defined(NAMD_HIP)   433                                      computeNonbondedWorkArrays,
   440 #if defined(NAMD_CUDA) || defined(NAMD_HIP)   449     case computeBondedCUDAType:
   450       c = createComputeBondedCUDA(i, 
this);
   456 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined(NAMD_HIP))   460           getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
   471 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   475           getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
   486 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   490           getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
   501 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   505           getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
   516 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   520           getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
   531 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   534           getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
   545 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   548         getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
   559 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   563           getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
   574 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   578           getComputeBondedCUDA()->registerCompute(map->computeData[i].node, map->
type(i), pids);
   602 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   605           getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
   615 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   618           getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
   628 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   631           getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
   641 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   644           getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
   654 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   657           getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
   667 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   669           getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
   679 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   681           getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
   691 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   694           getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
   704 #if defined(BONDED_CUDA) && (defined(NAMD_CUDA) || defined (NAMD_HIP))   706         getComputeBondedCUDA()->registerSelfCompute(map->computeData[i].node, map->
type(i), map->computeData[i].pids[0].pid);
   716     case computeDPMTAType:
   717         c = 
new ComputeDPMTA(i); 
   723     case computeDPMEType:
   724         c = computeDPMEObject = 
new ComputeDPME(i,
this); 
   730         c = 
new ComputePme(i,map->computeData[i].pids[0].pid); 
   734 #if defined(NAMD_CUDA) || defined(NAMD_HIP)   759         c = 
new ComputeStir(i,map->computeData[i].pids[0].pid); 
   795         c = 
new ComputeMsmMsa(i);
   848         NAMD_bug(
"Unknown compute type in ComputeMgr::createCompute().");
   854 #ifdef TRACE_COMPUTE_OBJECTS   859     int adim, bdim, cdim;
   861     int x1, y1, z1, x2, y2, z2;
   865         memset(user_des, 0, 50);
   866         switch ( map->
type(i) )
   869             sprintf(user_des, 
"computeNonBondedSelfType_%d_pid_%d", i, map->
pid(i,0));
   872             sprintf(user_des, 
"computeLCPOType_%d_pid_%d", i, map->
pid(i,0));
   879             t1 = map->
trans(i, 0);
   884             t2 = map->
trans(i, 1);
   891             sprintf(user_des, 
"computeNonBondedPairType_%d(%d,%d,%d)", i, dx,dy,dz);
   893 #if defined(NAMD_CUDA) || defined(NAMD_HIP)   895         case computeBondedCUDAType:
   896             sprintf(user_des, 
"computeBondedCUDAType_%d", i);
   901             sprintf(user_des, 
"computeExclsType_%d", i);
   904             sprintf(user_des, 
"computeBondsType_%d", i);
   907             sprintf(user_des, 
"computeAnglesType_%d", i);
   910             sprintf(user_des, 
"computeDihedralsType_%d", i);
   913             sprintf(user_des, 
"computeImpropersType_%d", i);
   916             sprintf(user_des, 
"computeTholeType_%d", i);
   919             sprintf(user_des, 
"computeAnisoType_%d", i);
   922             sprintf(user_des, 
"computeCrosstermsType_%d", i);
   925             sprintf(user_des, 
"computeOneFourNbTholeType_%d", i);
   928             sprintf(user_des, 
"computeSelfExclsType_%d", i);
   931             sprintf(user_des, 
"computeSelfBondsType_%d", i);
   934             sprintf(user_des, 
"computeSelfAnglesType_%d", i);
   937             sprintf(user_des, 
"computeSelfDihedralsType_%d", i);
   940             sprintf(user_des, 
"computeSelfImpropersType_%d", i);
   943             sprintf(user_des, 
"computeSelfTholeType_%d", i);
   946             sprintf(user_des, 
"computeSelfAnisoType_%d", i);
   949             sprintf(user_des, 
"computeSelfCrosstermsType_%d", i);
   952             sprintf(user_des, 
"computeSelfOneFourNbTholeType_%d", i);
   955         case computeDPMTAType:
   956             sprintf(user_des, 
"computeDPMTAType_%d", i);
   960         case computeDPMEType:
   961             sprintf(user_des, 
"computeDPMEType_%d", i);
   965             sprintf(user_des, 
"computePMEType_%d", i);
   967 #if defined(NAMD_CUDA) || defined(NAMD_HIP)   969             sprintf(user_des, 
"computePMECUDAType_%d", i);
   973             sprintf(user_des, 
"computeEwaldType_%d", i);
   976             sprintf(user_des, 
"computeFullDirectType_%d", i);
   979             sprintf(user_des, 
"computeGlobalType_%d", i);
   982             sprintf(user_des, 
"computeStirType_%d", i);
   985             sprintf(user_des, 
"computeExtType_%d", i);
   988             sprintf(user_des, 
"computeQMType_%d", i);
   991             sprintf(user_des, 
"computeEFieldType_%d", i);
   995             sprintf(user_des, 
"computeGridForceType_%d", i);
   999             sprintf(user_des, 
"computeSphericalBCType_%d", i);
  1002             sprintf(user_des, 
"computeCylindricalBCType_%d", i);
  1005             sprintf(user_des, 
"computeTclBCType_%d", i);
  1008             sprintf(user_des, 
"computeRestraintsType_%d", i);
  1011             sprintf(user_des, 
"computeConsForceType_%d", i);
  1014             sprintf(user_des, 
"computeConsTorqueType_%d", i);
  1017             NAMD_bug(
"Unknown compute type in ComputeMgr::registerUserEventForAllComputeObjs().");
  1020         int user_des_len = strlen(user_des);
  1021         char *user_des_cst = 
new char[user_des_len+1];
  1022         memcpy(user_des_cst, user_des, user_des_len);
  1023         user_des_cst[user_des_len] = 0;
  1044     int myNode = node->
myid();
  1046     if ( 
simParams->globalForcesOn && !myNode )
  1048         DebugM(4,
"Mgr running on Node "<<CkMyPe()<<
"\n");
  1053         #ifdef NODEGROUP_FORCE_REGISTER  1054         CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
  1055         PatchData *patchData = cpdata.ckLocalBranch();
  1056         patchData->master_mgr = 
this;
  1095 #if defined(NAMD_CUDA) || defined(NAMD_HIP)  1106     bool deviceIsMineBonded = (CkMyPe() == bondedMasterPe);
  1110     for (
int i=0; i < map->nComputes; i++)
  1112         if ( ! ( i % 100 ) )
  1116 #if defined(NAMD_CUDA) || defined(NAMD_HIP)  1117         switch ( map->
type(i) )
  1126             if ( ! deviceIsMine ) 
continue;
  1131             if ( ! deviceIsMine ) 
continue;
  1139               if ( ! deviceIsMineBonded ) 
continue;
  1142               if ( map->computeData[i].node != myNode ) 
continue;
  1149               if ( ! deviceIsMineBonded ) 
continue;
  1152               if ( map->computeData[i].node != myNode ) 
continue;
  1159               if ( ! deviceIsMineBonded ) 
continue;
  1162               if ( map->computeData[i].node != myNode ) 
continue;
  1169               if ( ! deviceIsMineBonded ) 
continue;
  1172               if ( map->computeData[i].node != myNode ) 
continue;
  1179               if ( ! deviceIsMineBonded ) 
continue;
  1182               if ( map->computeData[i].node != myNode ) 
continue;
  1189               if ( ! deviceIsMineBonded ) 
continue;
  1192               if ( map->computeData[i].node != myNode ) 
continue;
  1199               if ( ! deviceIsMineBonded ) 
continue;
  1202               if ( map->computeData[i].node != myNode ) 
continue;
  1209               if ( ! deviceIsMineBonded ) 
continue;
  1212               if ( map->computeData[i].node != myNode ) 
continue;
  1219               if ( ! deviceIsMineBonded ) 
continue;
  1222               if ( map->computeData[i].node != myNode ) 
continue;
  1226           case computeBondedCUDAType:
  1227             if ( ! deviceIsMineBonded ) 
continue;
  1228             if ( map->computeData[i].node != myNode ) 
continue;
  1230 #endif // BONDED_CUDA  1233             if ( ! deviceIsMine ) 
continue;
  1238             if ( map->computeData[i].node != myNode ) 
continue;
  1240 #else // defined(NAMD_CUDA) || defined(NAMD_HIP)  1241         if ( map->computeData[i].node != myNode ) 
continue;
  1243         DebugM(1,
"Compute " << i << 
'\n');
  1244         DebugM(1,
"  node = " << map->computeData[i].node << 
'\n');
  1245         DebugM(1,
"  type = " << map->computeData[i].type << 
'\n');
  1246         DebugM(1,
"  numPids = " << map->computeData[i].numPids << 
'\n');
  1248         for (
int j=0; j < map->computeData[i].numPids; j++)
  1250             DebugM(1,
"  pid " << map->computeData[i].pids[j].pid << 
'\n');
  1254         DebugM(1,
"\n---------------------------------------");
  1255         DebugM(1,
"---------------------------------------\n");
  1257         createCompute(i, map);
  1261 #if defined(NAMD_CUDA) || defined(NAMD_HIP)  1268       if (deviceIsMineBonded) {
  1269         getComputeBondedCUDA()->initialize();
  1289     else NAMD_die(
"ComputeMgr::computeGlobalObject is NULL!");
  1292 #ifdef NODEGROUP_FORCE_REGISTER  1298   #ifdef NODEGROUP_FORCE_REGISTER  1302     CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
  1303     PatchData *patchData = cpdata.ckLocalBranch();
  1304     CmiNodeLock &nl = patchData->nodeLock;
  1310       patchData->master_mgr->recvComputeGlobalData(msg);
  1326         patchData->master_mgr->recvComputeGlobalData(msg);
  1343     DebugM(3,
"["<<CkMyPe()<<
"] calling recvComputeGlobalResults\n");
  1347     CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
  1348     DebugM(3,
"["<<CkMyPe()<<
"] msg to recvComputeGlobalData\n");
  1349     cm[0].recvComputeGlobalData(msg);
  1350   #ifdef NODEGROUP_FORCE_REGISTER  1354   DebugM(3,
"["<<CkMyPe()<<
"] done sendComputeGlobalData\n");
  1360     if (masterServerObject)  
  1362       DebugM(3, 
"["<<CkMyPe()<<
"] recvComputeGlobalData calling recvData\n");
  1365     else NAMD_die(
"ComputeMgr::masterServerObject is NULL!");
  1373     DebugM(3,
"["<< CkMyPe()<< 
"] sendComputeGlobalResults seq "<<msg->
seq<<
"\n");
  1375   #ifdef NODEGROUP_FORCE_REGISTER  1380     for (
int pe = 0; pe < CkMyNodeSize(); pe++) {
  1381       if(CkpvAccessOther(ComputeGlobalResultsMsg_instance, pe)!=
nullptr)
  1385           delete CkpvAccessOther(ComputeGlobalResultsMsg_instance, pe);
  1399     DebugM(3,
"["<< CkMyPe() << 
"] ComputeMgr::sendComputeGlobalResults invoking bcast recvComputeGlobalResults\n");    
  1400     thisProxy.recvComputeGlobalResults(msg);
  1401   #ifdef NODEGROUP_FORCE_REGISTER  1431         CmiEnableUrgentSend(1);
  1435         CmiEnableUrgentSend(0);
  1446     else NAMD_die(
"ComputeMgr::computeGlobalObject is NULL!");
  1456     if (computeEwaldObject)
  1459         CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
  1460         cm[node].recvComputeEwaldData(msg);
  1467     else NAMD_die(
"ComputeMgr::computeEwaldObject is NULL!");
  1472     if (computeEwaldObject)
  1474     else NAMD_die(
"ComputeMgr::computeEwaldObject in recvData is NULL!");
  1484     if (computeEwaldObject) {
  1485         CmiEnableUrgentSend(1);
  1487         CmiEnableUrgentSend(0);
  1490     else NAMD_die(
"ComputeMgr::computeEwaldObject in recvResults is NULL!");
  1495     if ( computeDPMEObject )
  1498         int node = computeDPMEObject->getMasterNode();
  1499         CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
  1500         cm.recvComputeDPMEData(msg,node);
  1504     else NAMD_die(
"ComputeMgr::computeDPMEObject is NULL!");
  1509     if ( computeDPMEObject )
  1512         computeDPMEObject->recvData(msg);
  1516     else NAMD_die(
"ComputeMgr::computeDPMEObject is NULL!");
  1521     CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
  1522     cm[node].recvComputeDPMEResults(msg);
  1527     if ( computeDPMEObject )
  1530         computeDPMEObject->recvResults(msg);
  1534     else NAMD_die(
"ComputeMgr::computeDPMEObject is NULL!");
  1570 #ifdef NODEGROUP_FORCE_REGISTER  1573         CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
  1574         cpdata.setDeviceKernelUpdateCounter();
  1580 #if (defined(NAMD_CUDA) || defined(NAMD_HIP))  1584 #ifdef NODEGROUP_FORCE_REGISTER  1587         DebugM(3, 
"Call recvCudaGlobalMasterCreateMsg on master PE " << CkMyPe() << 
".\n");
  1590         std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> client = 
nullptr;
  1591         const std::string library_name = args[0];
  1593         std::shared_ptr<dlloader::DLLoader<CudaGlobalMaster::CudaGlobalMasterClient>> loader = 
nullptr;
  1594         for (
auto it = CudaGlobalMasterClientDlloaders.begin();
  1595              it != CudaGlobalMasterClientDlloaders.end(); ++it) {
  1596           if ((*it)->LibName() == library_name) {
  1602         if (loader == 
nullptr) {
  1606           iout << 
iINFO << 
"Loading library " << library_name
  1607                << 
" on PE: " << CkMyPe() << 
"\n" << 
endi;
  1608           loader->DLOpenLib();
  1609           client = loader->DLGetInstance();
  1610         } 
catch (std::exception& e) {
  1611           iout << 
iERROR << 
"Cannot load the shared library " << library_name << 
"\n" << 
endi;
  1616           client->initialize(args,
  1620           iout << 
iINFO << 
"CudaGlobalMaster client \"" << client->name()
  1621                 << 
"\"" << 
" initialized\n" << 
endi;
  1622         } 
catch (std::exception& e) {
  1623           iout << 
iERROR << 
"Cannot initialize the CudaGlobalMaster client from "  1624                 << library_name << 
"\n" << 
endi;
  1627         CudaGlobalMasterClientDlloaders.push_back(loader);
  1629         DebugM(3, 
"Skip recvCudaGlobalMasterCreateMsg on master PE " <<
  1630                   CkMyPe() << 
" that is not scheduled for GPU-resident global master.\n");
  1633       DebugM(3, 
"Skip recvCudaGlobalMasterCreateMsg on non-master PE " << CkMyPe() << 
".\n");
  1635 #endif // NODEGROUP_FORCE_REGISTER  1638       NAMD_die(
"GPU-resident mode is not enabled.\n");
  1641       NAMD_die(
"GPU-resident external forces are not enabled.\n");
  1645 #endif // (defined(NAMD_CUDA) || defined(NAMD_HIP))  1649 #if (defined(NAMD_CUDA) || defined(NAMD_HIP))  1652   const std::string client_name_to_remove = args[0];
  1654 #ifdef NODEGROUP_FORCE_REGISTER  1660           std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> c = 
nullptr;
  1661           const std::vector<std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient>>& clients = gm->getClients();
  1662           for (
size_t i = 0; i < clients.size(); ++i) {
  1663             if (client_name_to_remove == clients[i]->name()) {
  1669             gm->removeClient(c);
  1670             iout << 
iINFO << 
"CudaGlobalMasterClient \""  1671                  << client_name_to_remove << 
"\" removed\n" << 
endi;
  1673             const std::string error = 
"CudaGlobalMasterClient \""  1674               + client_name_to_remove + 
"\" not found";
  1680 #endif // NODEGROUP_FORCE_REGISTER  1683       NAMD_die(
"GPU-resident mode is not enabled.\n");
  1686       NAMD_die(
"GPU-resident external forces are not enabled.\n");
  1689 #endif // (defined(NAMD_CUDA) || defined(NAMD_HIP))  1693 #if (defined(NAMD_CUDA) || defined(NAMD_HIP))  1694   std::vector<std::string> result_args;
  1697   const std::string client_name_to_update = args[0];
  1699   int error_code = TCL_OK;
  1704 #ifdef NODEGROUP_FORCE_REGISTER  1710           std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient> c = 
nullptr;
  1711           const std::vector<std::shared_ptr<CudaGlobalMaster::CudaGlobalMasterClient>>& clients = gm->getClients();
  1712           for (
size_t i = 0; i < clients.size(); ++i) {
  1713             if (client_name_to_update == clients[i]->name()) {
  1719             result_args.push_back(client_name_to_update);
  1720             error_code = c->updateFromTCLCommand(args);
  1721             result_args.push_back(c->getTCLUpdateResult());
  1722             iout << 
iINFO << 
"CudaGlobalMasterClient \""  1723                  << client_name_to_update << 
"\" updated\n" << 
endi;
  1725             const std::string error = 
"CudaGlobalMasterClient \""  1726               + client_name_to_update + 
"\" not found";
  1732 #endif // NODEGROUP_FORCE_REGISTER  1735       NAMD_die(
"GPU-resident mode is not enabled.\n");
  1738       NAMD_die(
"GPU-resident external forces are not enabled.\n");
  1741   CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
  1742   cm[0].recvCudaGlobalMasterUpdateResultMsg(error_code, result_args);
  1743 #endif // (defined(NAMD_CUDA) || defined(NAMD_HIP))  1747   if (CkMyPe() == 0) {
  1748     if (!args.empty()) {
  1749       CudaGlobalMasterClientUpdateResults[args[0]] = tcl_error_code;
  1750       CudaGlobalMasterClientUpdateResultStrings[args[0]] = args[1];
  1753     const std::string error =
  1754       "recvCudaGlobalMasterUpdateResultMsg is called on " +
  1755       std::to_string(CkMyPe()) + 
" but expected on PE 0!\n";
  1761   return CudaGlobalMasterClientUpdateResults.at(client_name);
  1765   return CudaGlobalMasterClientUpdateResultStrings.at(client_name);
  1769     CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
  1770     cm[pe].recvYieldDevice(CkMyPe());
  1778 #if defined(NAMD_CUDA) || defined(NAMD_HIP)  1786   for (
int i=0;i < pes.size();i++) {
  1789     thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
  1799   for (
int i=0;i < pes.size();i++) {
  1802     thisProxy[pes[i]].recvSkipPatchesOnPe(msg);
  1812   for (
int i=0;i < pes.size();i++) {
  1816     thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
  1830   thisProxy[pe].recvFinishPatchOnPe(msg);
  1839   for (
int i=0;i < pes.size();i++) {
  1843     thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
  1855   thisProxy[pe].recvFinishReductions(msg);
  1866   thisProxy[pe].recvMessageEnqueueWork(msg);
  1877   thisProxy[pe].recvLaunchWork(msg);
  1886   for (
int i=0;i < pes.size();i++) {
  1889     thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
  1900 class ComputeBondedCUDAMsg : 
public CMessage_ComputeBondedCUDAMsg {
  1902   ComputeBondedCUDA* c;
  1907   for (
int i=0;i < pes.size();i++) {
  1908     ComputeBondedCUDAMsg *msg = 
new ComputeBondedCUDAMsg;
  1910     thisProxy[pes[i]].recvAssignPatchesOnPe(msg);
  1915   msg->c->assignPatchesOnPe();
  1920   ComputeBondedCUDAMsg *msg = 
new ComputeBondedCUDAMsg;
  1922   thisProxy[pe].recvMessageEnqueueWork(msg);
  1926   msg->c->messageEnqueueWork();
  1931   for (
int i=0;i < pes.size();i++) {
  1932     ComputeBondedCUDAMsg *msg = 
new (
PRIORITY_SIZE) ComputeBondedCUDAMsg;
  1935     thisProxy[pes[i]].recvOpenBoxesOnPe(msg);
  1940   msg->c->openBoxesOnPe();
  1944 void ComputeMgr::sendLoadTuplesOnPe(std::vector<int>& pes, ComputeBondedCUDA* c) {
  1945   for (
int i=0;i < pes.size();i++) {
  1946     ComputeBondedCUDAMsg *msg = 
new ComputeBondedCUDAMsg;
  1948     thisProxy[pes[i]].recvLoadTuplesOnPe(msg);
  1952 void ComputeMgr::recvLoadTuplesOnPe(ComputeBondedCUDAMsg *msg) {
  1953   msg->c->loadTuplesOnPe();
  1958   ComputeBondedCUDAMsg *msg = 
new ComputeBondedCUDAMsg;
  1960   thisProxy[pe].recvLaunchWork(msg);
  1964   msg->c->launchWork();
  1969   for (
int i=0;i < pes.size();i++) {
  1970     ComputeBondedCUDAMsg *msg = 
new (
PRIORITY_SIZE) ComputeBondedCUDAMsg;
  1973     thisProxy[pes[i]].recvFinishPatchesOnPe(msg);
  1978   msg->c->finishPatchesOnPe();
  1983   ComputeBondedCUDAMsg *msg = 
new ComputeBondedCUDAMsg;
  1985   thisProxy[pe].recvFinishReductions(msg);
  1989   msg->c->finishReductions();
  1994   for (
int i=0;i < pes.size();i++) {
  1995     ComputeBondedCUDAMsg *msg = 
new ComputeBondedCUDAMsg;
  1997     thisProxy[pes[i]].recvUnregisterBoxesOnPe(msg);
  2002   msg->c->unregisterBoxesOnPe();
  2006 #endif // BONDED_CUDA  2010 #include "ComputeMgr.def.h" 
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
 
void finishPatchOnPe(int i)
 
#define COMPUTE_PROXY_PRIORITY
 
void recvComputeEwaldData(ComputeEwaldMsg *)
 
__thread DeviceCUDA * deviceCUDA
 
void updateLocalComputes()
 
#define NAMD_BONDEDGPU_IMPROPERS
 
#define NAMD_EVENT_STOP(eon, id)
 
std::ostream & iINFO(std::ostream &s)
 
void sendYieldDevice(int pe)
 
void recvData(ComputeEwaldMsg *)
 
virtual void initialize()
 
void recvComputeDPMEResults(ComputeDPMEResultsMsg *)
 
#define NAMD_BONDEDGPU_CROSSTERMS
 
void recvResults(ComputeEwaldMsg *)
 
void setNewNumPartitions(ComputeID cid, char numPartitions)
 
void recvResults(ComputeGlobalResultsMsg *)
 
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
 
void saveComputeMap(const char *fname)
 
static ProxyMgr * Object()
 
#define NAMD_BONDEDGPU_ANISOS
 
#define TRACE_COMPOBJ_IDOFFSET
 
void updateLocalComputes5()
 
CudaComputeNonbonded * getCudaComputeNonbonded()
 
int getMasterNode() const
 
static PatchMap * Object()
 
void recvFinishPatchOnPe(CudaComputeNonbondedMsg *msg)
 
#define NAMD_BONDEDGPU_ONEFOURENBTHOLES
 
void buildProxySpanningTree2()
 
void sendMessageEnqueueWork(int pe, CudaComputeNonbonded *c)
 
#define NAMD_BONDEDGPU_ANGLES
 
#define NAMD_BONDEDGPU_THOLES
 
SimParameters * simParameters
 
void sendFinishReductions(int pe, CudaComputeNonbonded *c)
 
void registerCompute(ComputeID cid, Compute *c)
 
std::string getCudaGlobalMasterUpdateResultString(const std::string &client_name) const
 
void basePatchIDList(int pe, PatchIDList &)
 
void setNumPartitions(ComputeID cid, char numPartitions)
 
void recvComputeConsForceMsg(ComputeConsForceMsg *)
 
std::ostream & endi(std::ostream &s)
 
virtual void initialize()
 
#define PROXY_DATA_PRIORITY
 
void updateLocalComputes3()
 
void Migrate(LDObjHandle handle, int dest)
 
int getCudaGlobalMasterUpdateResult(const std::string &client_name) const
 
int index_a(int pid) const
 
#define NAMD_BONDEDGPU_DIHEDRALS
 
void recvMessageEnqueueWork(CudaComputeNonbondedMsg *msg)
 
void messageEnqueueWork()
 
int add(const Elem &elem)
 
void createComputes(ComputeMap *map)
 
void setNode(ComputeID cid, NodeID node)
 
void recvLaunchWork(CudaComputeNonbondedMsg *msg)
 
Molecule stores the structural information for the system. 
 
int computeGlobalResultsMsgSeq
 
void updateLocalComputes2(CkQdMsg *)
 
void recvAssignPatchesOnPe(CudaComputeNonbondedMsg *msg)
 
void doneUpdateLocalComputes()
 
int gridsize_c(void) const
 
void recvCudaGlobalMasterRemoveMsg(std::vector< std::string > args)
 
int getGlobalDevice() const
 
char newNumPartitions(ComputeID cid)
 
void recvComputeGlobalConfig(ComputeGlobalConfigMsg *)
 
int computeGlobalResultsMsgMasterSeq
 
void sendComputeEwaldData(ComputeEwaldMsg *)
 
void registerUserEventsForAllComputeObjs()
 
int gridsize_a(void) const
 
void sendLaunchWork(int pe, CudaComputeNonbonded *c)
 
#define NAMD_EVENT_START(eon, id)
 
static NAMD_HOST_DEVICE int offset_b(int i)
 
void recvYieldDevice(int pe)
 
void NAMD_bug(const char *err_msg)
 
static NAMD_HOST_DEVICE int offset_c(int i)
 
void sendComputeGlobalResults(ComputeGlobalResultsMsg *)
 
ComputeType type(ComputeID cid)
 
static ComputeCUDAMgr * getComputeCUDAMgr()
 
void removeUnusedProxies(void)
 
int index_b(int pid) const
 
void sendUnregisterBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
 
#define NAMD_BONDEDGPU_EXCLS
 
bool device_shared_with_pe(int pe)
 
int numPartitions(ComputeID cid)
 
void unregisterBoxesOnPe()
 
std::shared_ptr< CudaGlobalMasterServer > createCudaGlobalMaster()
 
bool getIsGlobalDevice() const
 
void sendFinishPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
 
void recvFinishReductions(CudaComputeNonbondedMsg *msg)
 
void createProxy(PatchID pid)
 
void setNewNode(ComputeID cid, NodeID node)
 
int partition(ComputeID cid)
 
void updateLocalComputes4(CkQdMsg *)
 
void NAMD_die(const char *err_msg)
 
static LdbCoordinator * Object()
 
void recvFinishPatchesOnPe(CudaComputeNonbondedMsg *msg)
 
void enableComputeGlobalResults()
 
void registerComputeSelf(ComputeID cid, PatchID pid)
 
void recvComputeGlobalResults(ComputeGlobalResultsMsg *)
 
ResizeArray< ComputeGlobalResultsMsg * > computeGlobalResultsMsgs
 
void recvComputeDPMEData(ComputeDPMEDataMsg *)
 
ComputeGlobal * computeGlobalObject
 
void recvData(ComputeGlobalDataMsg *)
 
void recvComputeEwaldResults(ComputeEwaldMsg *)
 
int index_c(int pid) const
 
void sendComputeDPMEData(ComputeDPMEDataMsg *)
 
void saveComputeMapChanges(int, CkGroupID)
 
void recvComputeGlobalData(ComputeGlobalDataMsg *)
 
void addClient(GlobalMaster *newClient)
 
void sendComputeDPMEResults(ComputeDPMEResultsMsg *, int)
 
static NAMD_HOST_DEVICE int offset_a(int i)
 
void updateComputes2(CkQdMsg *)
 
void registerComputePair(ComputeID cid, PatchID *pid, int *trans)
 
Compute * compute(ComputeID cid)
 
ComputeID cloneCompute(ComputeID src, int partition)
 
static ComputeMap * Object()
 
void recvOpenBoxesOnPe(CudaComputeNonbondedMsg *msg)
 
void del(int index, int num=1)
 
void recvUnregisterBoxesOnPe(CudaComputeNonbondedMsg *msg)
 
void sendComputeGlobalConfig(ComputeGlobalConfigMsg *)
 
void recvCudaGlobalMasterCreateMsg(std::vector< std::string > args)
 
void assignPatches(ComputeMgr *computeMgrIn)
 
CudaComputeNonbonded * getCudaComputeNonbonded()
 
void recvSkipPatchesOnPe(CudaComputeNonbondedMsg *msg)
 
CudaComputeNonbonded * createCudaComputeNonbonded(ComputeID c)
 
int numPids(ComputeID cid)
 
void recvCudaGlobalMasterUpdateResultMsg(int tcl_error_code, std::vector< std::string > args)
 
int gridsize_b(void) const
 
void sendComputeGlobalData(ComputeGlobalDataMsg *)
 
int pid(ComputeID cid, int i)
 
std::ostream & iERROR(std::ostream &s)
 
#define SET_PRIORITY(MSG, SEQ, PRIO)
 
int trans(ComputeID cid, int i)
 
void sendOpenBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
 
void sendFinishPatchOnPe(int pe, CudaComputeNonbonded *c, int i, PatchID patchID)
 
void recvCudaGlobalMasterUpdateMsg(std::vector< std::string > args)
 
void updateComputes(int, CkGroupID)
 
void sendComputeEwaldResults(ComputeEwaldMsg *)
 
colvarproxy_namd GlobalMasterColvars
 
void sendAssignPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
 
void splitComputes2(CkQdMsg *)
 
#define NAMD_BONDEDGPU_BONDS
 
#define PATCH_PRIORITY(PID)
 
void sendSkipPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
 
NodeID newNode(ComputeID cid)