10 #include "ProxyMgr.decl.h"    31 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)    39 #define MIN_DEBUG_LEVEL 2    42 #define ALLOCA(TYPE,NAME,SIZE) TYPE *NAME = (TYPE *) alloca((SIZE)*sizeof(TYPE))    59   msg_size += 
sizeof(msg->
node);
    60   msg_size += 
sizeof(msg->
patch);
    65     msg_size += 
sizeof(array_size);
    66     msg_size += array_size * 
sizeof(char);    
    69     int nonzero_count = 0;
    70     for ( 
int i = 0; i < array_size; ++i ) {
    71       if ( 
f[i].x != 0. || 
f[i].y != 0. || 
f[i].z != 0. ) { ++nonzero_count; }
    73     msg_size += nonzero_count * 
sizeof(
Vector);
    76   void *msg_buf = CkAllocBuffer(msg,msg_size);
    77   char *msg_cur = (
char *)msg_buf;
    79   CmiMemcpy((
void*)msg_cur,(
void*)(&(msg->
node)),
sizeof(msg->
node));
    80   msg_cur += 
sizeof(msg->
node);
    81   CmiMemcpy((
void*)msg_cur,(
void*)(&(msg->
patch)),
sizeof(msg->
patch));
    82   msg_cur += 
sizeof(msg->
patch);
    85     *(
int *) msg_cur = array_size;
    86     msg_cur += 
sizeof(int);
    87     char *nonzero = msg_cur;
    88     msg_cur += array_size * 
sizeof(char);
    89     msg_cur = (
char *)
ALIGN_8 (msg_cur);
    93     for ( 
int i = 0; i < array_size; ++i ) {
    94       if ( 
f[i].x != 0. || 
f[i].y != 0. || 
f[i].z != 0. ) {
   104     msg_cur = (
char *) farr;      
   115   char *msg_cur = (
char*)ptr;
   117   CmiMemcpy((
void*)(&(msg->
node)),(
void*)msg_cur,
sizeof(msg->
node));
   118   msg_cur += 
sizeof(msg->
node);
   119   CmiMemcpy((
void*)(&(msg->
patch)),(
void*)msg_cur,
sizeof(msg->
patch));
   120   msg_cur += 
sizeof(msg->
patch);
   123     int array_size = *(
int *) msg_cur;
   124     msg_cur += 
sizeof(array_size);
   125     msg->
forceList[j] = &(msg->forceListInternal[j]);
   127     char *nonzero = msg_cur;
   128     msg_cur += array_size * 
sizeof(char);    
   129     msg_cur = (
char *)
ALIGN_8 (msg_cur);
   132     for ( 
int i = 0; i < array_size; ++i ) {
   139         f[i].x = 0.;  
f[i].y = 0.;  
f[i].z = 0.;
   142     msg_cur = (
char *) farr;
   155         tmpLen[i] = fls[i].
size();
   156         iszeroLen += tmpLen[i];
   158     char *tmpIszero = 
new char[iszeroLen];
   159     char *iszeroPtr = tmpIszero;
   163         for(
int j=0; j<tmpLen[i]; j++, fiPtr++, iszeroPtr++) {         
   164             if(fiPtr->x!=0.0 || fiPtr->y!=0.0 || fiPtr->z!=0) {
   178     iszeroPtr = tmpIszero;
   182         for(
int j=0; j<tmpLen[i]; j++, fiPtr++, iszeroPtr++) {
   183             if((*iszeroPtr)!=1) {
   184                 forcePtr->
x = fiPtr->x;
   185                 forcePtr->
y = fiPtr->y;
   186                 forcePtr->
z = fiPtr->z;
   191     memcpy(retmsg->
isZero, tmpIszero, 
sizeof(
char)*iszeroLen);
   198     for(
int i=0; i<size; i++) {
   199         numAllPes += tree[i].
numPes;
   205     int *pAllPes = retmsg->
allPes;
   206     for(
int i=0; i<size; i++) {
   208         for(
int j=0; j<tree[i].
numPes; j++) {
   209             *pAllPes = tree[i].
peIDs[j];
   218 #ifdef PROCTRACE_DEBUG   219     DebugFileTrace *dft = DebugFileTrace::Object();
   221     const char *patchname = 
"ProxyPatch";
   222     if(
procID == CkMyPe()) patchname = 
"HomePatch";
   223     dft->writeTrace(
"%s: %s[%d] on proc %d node %d has ST (src %d) with %d nodes\n", 
   229     dft->writeTrace(
"%s: ===%d===pes/node: ", tag, 
patch);
   233     dft->writeTrace(
"\n%s: ===%d===pe list: ", tag, 
patch);
   237             dft->writeTrace(
"%d ", *p);
   241     dft->writeTrace(
"\n");    
   249   int nonzero_count = 0;
   253     totalFLLen +=  array_size;
   255     for ( 
int i = 0; i < array_size; ++i ) {
   256       if ( 
f[i].x != 0. || 
f[i].y != 0. || 
f[i].z != 0. ) { ++nonzero_count; }
   263          envelope *oenv = UsrToEnv(msg);
   264          envelope *nenv = UsrToEnv(msg_buf);
   265          CmiMemcpy(nenv->getPrioPtr(), oenv->getPrioPtr(), nenv->getPrioBytes());
   269   for (
int i=0; i<nodeSize; i++) {
   272   #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)   273   msg_buf->destPe = msg->destPe;
   274   #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)   275   msg_buf->isFromImmMsgCall = msg->isFromImmMsgCall;
   284     msg_buf->
flLen[j] = array_size;
   286     for ( 
int i = 0; i < array_size; ++i , isNonZeroPtr++) {
   287       if ( 
f[i].x != 0. || 
f[i].y != 0. || 
f[i].z != 0. ) {
   309   for (
int i=0; i<ptr->
nodeSize; i++) {
   312   #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)   313   msg->destPe = ptr->destPe;
   314   #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)   315   msg->isFromImmMsgCall = ptr->isFromImmMsgCall;
   324     int array_size = ptr->
flLen[j];
   325     msg->
forceList[j] = &(msg->forceListInternal[j]);
   329     for ( 
int i = 0; i < array_size; ++i, nonzero++ ) {
   336         f[i].x = 0.;  
f[i].y = 0.;  
f[i].z = 0.;
   349   if (CkpvAccess(ProxyMgr_instance)) {
   350     NAMD_bug(
"Tried to create ProxyMgr twice.");
   352   CkpvAccess(ProxyMgr_instance) = 
this;
   357   CkpvAccess(ProxyMgr_instance) = NULL;
   362   if(CkMyRank()!=0) 
return; 
   371   if(CkMyRank()!=0) 
return;
   380     if(CkMyRank()!=0) 
return;
   391   for ( pi = pi.
begin(); pi != pi.
end(); pi++)
   393     delete pi->proxyPatch;
   402   for ( pi = pi.
begin(); pi != pi.
end(); pi++)
   404     if ( pi->proxyPatch->getNumComputes() == 0 ) {
   405       toDelete.
add(pi->patchID);
   410   for ( ; pidi != toDelete.
end(); ++pidi ) {
   423   int myNode = CkMyPe();
   424   enum PatchFlag { Unknown, Home, NeedProxy };
   425   int *patchFlag = 
new int[numPatches]; 
   429   for ( i = 0; i < numPatches; ++i )
   431     patchFlag[i] = ( patchMap->
node(i) == myNode ) ? Home : Unknown;
   434 #if !(defined(NAMD_CUDA) || defined(NAMD_HIP))   439   for ( i = 0; i < basepids.
size(); ++i )
   441     if ( patchMap->
node(basepids[i]) != myNode ) {
   442         patchFlag[basepids[i]] = NeedProxy;
   445     for ( j = 0; j < numNeighbors; ++j )
   447       if ( ! patchFlag[neighbors[j]] ) {
   448         patchFlag[neighbors[j]] = NeedProxy;
   458   for ( i = 0; i < nc; ++i )
   460 #if defined(NAMD_CUDA) || defined(NAMD_HIP)   464 #elif defined(NAMD_MIC)   466     if ( computeMap->directToDevice(i) != 0 ) { 
continue; } 
   468     if ( computeMap->
node(i) != myNode ) 
   470     int numPid = computeMap->
numPids(i);
   471     for ( j = 0; j < numPid; ++j )
   473       int pid = computeMap->
pid(i,j);
   474       if ( ! patchFlag[pid] ) {
   475         patchFlag[pid] = NeedProxy;
   480   for ( i = 0; i < numPatches; ++i ) {
   481     if ( patchFlag[i] == NeedProxy )
   495      DebugM(4,
"createProxy("<<pid<<
")\n");
   527   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
   528   cp[node].recvRegisterProxy(msg);
   547   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
   548   cp[node].recvUnregisterProxy(msg);
   564   for (
int i=0; i<pids.
size(); i++) {
   566     if (home == NULL) CkPrintf(
"ERROR: homepatch NULL\n");
   567 #ifdef NODEAWARE_PROXY_SPANNINGTREE   568     home->buildNodeAwareSpanningTree();
   583   for (
int i=0; i<pids.
size(); i++) {
   585     if (home == NULL) CkPrintf(
"ERROR: homepatch NULL\n");
   591   for(iter=iter.
begin(); iter!=iter.
end(); iter++) {
   601   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
   602   cp[0].recvProxies(pid, list, n);
   608 #define MAX_INTERNODE 1   612         FILE *ofp = fopen(
"patch_proxylist.txt", 
"w");
   613         std::vector<int> plist;
   614         for(
int i=0; i<np; i++) {
   615                 fprintf(ofp, 
"%d: ", i);
   617                 fprintf(ofp, 
"#%d ", listlen);
   619                 for(
int j=0; j<listlen; j++) {
   622                 std::sort(plist.begin(), plist.end());
   623                 for(
int j=0; j<listlen; j++) {
   624                         fprintf(ofp, 
"%d ", plist[j]);
   639   for (
int i=0; i<n; i++)
   649 #ifdef NODEAWARE_PROXY_SPANNINGTREE   650     buildNodeAwareSpanningTree0();
   663         for(
int i=0; i<nPatches; i++) {
   667                 for(
int j=0; j<plen; j++) {
   679 #ifdef NODEAWARE_PROXY_SPANNINGTREE   680     buildNodeAwareSpanningTree0();
   718   for (i=0; i<CkNumPes(); i++) 
procidx[i] = i;
   733   else if(CkNumPes()<4097)
   735   else if(CkNumPes()<8193)
   737   else if(CkNumPes()<16385)
   741   for (
int i=0; i<exclude; i++) 
if (
procidx[i] == p) 
return 1;
   746 #ifdef NODEAWARE_PROXY_SPANNINGTREE   748 void ProxyMgr::buildNodeAwareSpanningTree0(){
   749         CkPrintf(
"Info: build node-aware spanning tree with send: %d, recv: %d with branch factor %d\n", 
   753     for (
int pid=0; pid<numPatches; pid++)     
   754         buildSinglePatchNodeAwareSpanningTree(pid, ptree.
proxylist[pid], ptree.naTrees[pid]);
   777     for(;pid<numPatches; pid++) {
   780     if(pid==numPatches) {
   783     int *proxyNodeMap = 
new int[CkNumNodes()];
   784     memset(proxyNodeMap, 0, 
sizeof(
int)*CkNumNodes());
   790     for(
int i=1; i<lastInterNodeIdx; i++) { 
   791         int nid = onePatchT.
item(i).nodeID;
   800     for(; pid<numPatches; pid++) {
   804         for(
int i=1; i<=lastInterNodeIdx; i++) {
   805             int nid = onePatchT.
item(i).nodeID;
   815             int leastAmount = ~(1<<31);
   818             for(swapPos=lastInterNodeIdx+1; swapPos<onePatchT.
size(); swapPos++) {
   819                 int chiNId = onePatchT.
item(swapPos).nodeID;
   823                 if(proxyNodeMap[chiNId]<leastAmount) {
   824                     leastAmount = proxyNodeMap[chiNId];
   828             if(swapPos==onePatchT.
size()) {
   829                 CmiAssert(leastIdx!=-1); 
   837             proxyNodeMap[swapNode->
nodeID]++; 
   838             int tmp = curNode->
nodeID;
   844             int *tmpPes = curNode->
peIDs;
   846             swapNode->
peIDs = tmpPes;
   849     delete [] proxyNodeMap;    
   859     if(CmiMyNodeSize()==1) {
   865     for(;pid<numPatches; pid++) {
   868     if(pid==numPatches) {
   871     int *proxyCoreMap = 
new int[CkNumPes()];
   872     memset(proxyCoreMap, 0, 
sizeof(
int)*CkNumPes());
   878     for(
int i=1; i<lastInterNodeIdx; i++) { 
   879         int rootProcID = onePatchT.
item(i).peIDs[0];
   880         proxyCoreMap[rootProcID]++;
   886     for(; pid<numPatches; pid++) {
   890         for(
int i=1; i<=lastInterNodeIdx; i++) {
   892             int rootProcID = curNode->
peIDs[0];
   897                 proxyCoreMap[rootProcID]++;
   904             int leastAmount = ~(1<<31);
   908             for(swapPos=1; swapPos<curNode->
numPes; swapPos++) {
   909                 int otherCoreID = curNode->
peIDs[swapPos];
   913                 if(proxyCoreMap[otherCoreID]<leastAmount) {
   914                     leastAmount = proxyCoreMap[otherCoreID];
   918             if(swapPos==curNode->
numPes) {
   919                 CmiAssert(leastIdx!=-1); 
   924             int tmp = curNode->
peIDs[swapPos];
   926             curNode->
peIDs[0] = tmp;
   938 void ProxyMgr::buildSinglePatchNodeAwareSpanningTree(
PatchID pid, 
NodeIDList &proxyList, 
   951     std::map<int, int> proxyNodeMap; 
   952     std::vector<int> proxyNodeIDs;
   953     std::map<int, int> proxyTreeIdx; 
   957     int hpNodeID = CkNodeOf(hpProcID);
   958     proxyNodeMap[hpNodeID]=1;
   959     proxyTreeIdx[hpNodeID]=0;
   960     proxyNodeIDs.push_back(hpNodeID);
   965         int procId = proxyList[i];
   966         int nodeId = CkNodeOf(procId);
   967         std::map<int, int>::iterator it=proxyNodeMap.find(nodeId);
   968         if(it==proxyNodeMap.end()) {
   969             proxyNodeMap[nodeId] = 1;
   970             proxyTreeIdx[nodeId] = proxyNodeIDs.size();
   971             proxyNodeIDs.push_back(nodeId);
   973             proxyNodeMap[nodeId]++;
   977     int numNodesWithProxies = proxyNodeIDs.
size();
   978     oneNATree.
resize(numNodesWithProxies);
   980     for(
int i=0; i<numNodesWithProxies; i++) {
   982         delete [] oneNode->
peIDs;
   983         oneNode->
nodeID = proxyNodeIDs[i];
   984         oneNode->
peIDs = 
new int[proxyNodeMap[oneNode->
nodeID]];                        
   990     rootnode->
peIDs[0] = hpProcID;
   994         int procId = proxyList[i];
   995         int nodeId = CkNodeOf(procId);
   996         int idxInTree = proxyTreeIdx[nodeId];
   997         CmiAssert(idxInTree>=0 && idxInTree<numNodesWithProxies);
  1003 #else //branch of NODEAWARE_PROXY_SPANNINGTREE  1008         CkPrintf(
"Info: build spanning tree with send: %d, recv: %d with branch factor %d\n", 
  1015   int *numPatchesOnNode = 
new int[CkNumPes()];
  1016   int numNodesWithPatches = 0;
  1017   for (i=0; i<CkNumPes(); i++) numPatchesOnNode[i] = 0;
  1019   for (i=0; i<numPatches; i++) {
  1021     numPatchesOnNode[node]++;
  1022     if (numPatchesOnNode[node] == 1)
  1023       numNodesWithPatches ++;
  1025   int patchNodesLast =
  1026     ( numNodesWithPatches < ( 0.7 * CkNumPes() ) );
  1027   int *ntrees = 
new int[CkNumPes()];
  1028   for (i=0; i<CkNumPes(); i++) ntrees[i] = 0;
  1030   for (
int pid=0; pid<numPatches; pid++) 
  1037       delete [] numPatchesOnNode;
  1053       int oldindex = oldtree.
find(p);
  1054       if (oldindex != -1 && oldindex <= 
numProxies) {
  1056         if (!isIntermediate) {
  1068       if (tree.
find(p) != -1) 
continue;        
  1070       if (patchNodesLast && numPatchesOnNode[p] ) {
  1071         while (tree[e] != -1) { e--; 
if (e==-1) e = 
numProxies; }
  1074         if (isIntermediate) ntrees[p]++;
  1077         while (tree[s] != -1) { s++; 
if (s==
numProxies+1) s = 1; }
  1081           while (tree[e] != -1) { e--; 
if (e==-1) e = 
numProxies; }
  1084           if (isIntermediate) ntrees[p]++;
  1089           if (isIntermediate) ntrees[p]++;
  1095       ptree.
sizes[pid] = treesize;
  1102   delete [] numPatchesOnNode;
  1109   for (
int pid=0; pid<numPatches; pid++) {
  1111 #ifdef NODEAWARE_PROXY_SPANNINGTREE  1129   CProxy_ProxyMgr cp(thisgroup);
  1141   CProxy_ProxyMgr cp(thisgroup);
  1155   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
  1156   cp[msg->
tree[0]].recvSpanningTree(msg);
  1160   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
  1163 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)  1164   DebugFileTrace *dft = DebugFileTrace::Object();
  1166   dft->writeTrace(
"PMgr::sndST: from proc %d for patch[%d]\n", pe, msg->
patch);
  1170   cp[pe].recvNodeAwareSpanningTree(msg);
  1181     if (size > i+1) { child[i] = msg->
tree[i+1]; nChild++; }
  1196   int level = 1, index=1;
  1199     for (
int n=0; n<nChild; n++) {
  1201       for (
int j=0; j<level; j++) {
  1202        if (index >= size) { done = 1; 
break; }
  1203        tree[n].
add(msg->
tree[index]);
  1212     if (tree[i].size()) {
  1215       cmsg->
node = CkMyPe();
  1230 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)  1231     DebugFileTrace *dft = DebugFileTrace::Object();
  1242     int eNChild = treesize-1; 
  1244     CmiAssert(treesize>0);
  1253             iNChild = (iSlots>iNChild)?iNChild:iSlots;
  1256     int numChild = iNChild + eNChild;
  1261 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)  1268                 int onSameNode = (CkMyNode() == CkNodeOf(msg->
procID));
  1274             CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
  1279         proxy->setSTNodeChildren(0, NULL);       
  1294         ALLOCA(
int,children,numChild);
  1297         for(
int i=0; i<eNChild; i++) {
  1302         for(
int i=eNChild, j=1; i<numChild; i++, j++) {
  1303             children[i] = msg->
allPes[j]; 
  1307 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)  1308                 int onSameNode = (CkMyNode() == CkNodeOf(msg->
procID));
  1311                         CProxy_NodeProxyMgr pm(CkpvAccess(BOCclass_group).nodeProxyMgr);
  1316                         ALLOCA(
int,nodeChildren,eNChild+1);
  1318                         for(
int i=0; i<eNChild; i++) {
  1319                                 nodeChildren[i] = CkNodeOf(*p);
  1323                         nodeChildren[eNChild] = CkNodeOf(msg->
allPes[0]);
  1324                         proxy->setSTNodeChildren(eNChild+1, nodeChildren);
  1326                         proxy->setSTNodeChildren(0, NULL);
  1343             for(
int childID=0; childID<eNChild; childID++) {
  1345                 for(
int i=0; i<nodesToCnt; i++) {
  1347                     exTreeChildSize[childID].
add(cursize);
  1348                     exTreeChildPtr[childID].
add(pePtr);
  1367         int *pePtr = msg->
allPes+1; 
  1370             for(
int childID=eNChild; childID<numChild; childID++) {
  1372                 for(
int i=0; i<nodesToCnt; i++) {                    
  1373                     exTreeChildSize[childID].
add(1);
  1374                     exTreeChildPtr[childID].
add(pePtr);
  1388     for(
int i=0; i<numChild; i++) {                
  1391         int totalNodes = allSizes->
size();
  1393         for(
int j=0; j<totalNodes; j++) totalPes += allSizes->
item(j);
  1398         int *pAllPes = cmsg->
allPes;
  1399         for(
int j=0; j<totalNodes; j++) {
  1400             int numPes = allSizes->
item(j);
  1402             memcpy(pAllPes, allPtrs->
item(j), 
sizeof(int)*numPes);
  1405         #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)  1410     delete [] exTreeChildSize;
  1411     delete [] exTreeChildPtr;  
  1416 #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)  1417     DebugFileTrace *dft = DebugFileTrace::Object();
  1419     dft->writeTrace(
"PMgr::recvSTParent: for ProxyPatch[%d], parent is %d\n", patch, parent);
  1423     CmiAssert(proxy!=NULL);
  1428     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
  1430     CmiEnableUrgentSend(1);
  1431     cp[node].recvResults(msg);
  1432     CmiEnableUrgentSend(0);
  1441   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
  1443   CmiEnableUrgentSend(1);
  1444   cp[node].recvResults(msg);
  1445   CmiEnableUrgentSend(0);
  1460     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
  1461     CmiAssert(destPe!=CkMyPe());
  1463 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)  1467       cMsg->destPe = destPe;
  1468       CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
  1469       cnp[CkNodeOf(destPe)].recvImmediateResults(cMsg);
  1471       cp[destPe].recvImmediateResults(cMsg);
  1485 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)  1491         CkpvAccess(_qd)->create();
  1501     NAMD_bug(
"ProxyMgr should receive result message on home processor");
  1508     CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);        
  1509     CmiEnableUrgentSend(1);
  1510     cp[CkMyPe()].recvResults(omsg);
  1511     CmiEnableUrgentSend(0);
  1517                 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
  1526 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)  1530     int destRank = CkRankOf(msg->destPe);
  1531     PatchMap *pmap = localPatchMaps[destRank];
  1534 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)  1535         msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
  1537         CProxy_ProxyMgr cp(localProxyMgr);
  1538         CmiEnableUrgentSend(1);
  1539         cp[msg->destPe].recvResults(msg);
  1540         CmiEnableUrgentSend(0);
  1552             CProxy_NodeProxyMgr cnp(thisgroup);
  1555             cnp[CkNodeOf(cMsg->destPe)].recvImmediateResults(cMsg);
  1563 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)  1565         CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
  1566         for(
int i=0; i<pcnt-1; i++) {
  1568             cnp[pids[i]].recvImmediateProxyData(copymsg);
  1570         cnp[pids[pcnt-1]].recvImmediateProxyData(msg);
  1574   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
  1575   cp.recvImmediateProxyData(msg,pcnt,pids);
  1581 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)  1587         CkpvAccess(_qd)->create();
  1605 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE  1607         PersistentHandle *treephs = proxy->getSpanningTreePhs(ntreephs);
  1608         CmiAssert(treephs && ntreephs == npid);
  1609         CmiUsePersistentHandle(treephs, ntreephs);
  1612 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE  1613         CmiUsePersistentHandle(NULL, 0);
  1618       CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
  1619       cp.recvProxyData(newmsg,npid,pids);
  1624   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
  1625   cp[CkMyPe()].recvProxyData(msg);
  1629 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)  1630     CProxy_ProxyMgr cp(localProxyMgr);
  1632     CmiAssert(ptn->
numPes!=0);
  1636     int rank = CkRankOf(ptn->
peIDs[0]);
  1637     PatchMap *pmap = localPatchMaps[rank];
  1640     int npid = ppatch->getSTNNodeChild();
  1641     int *pids = ppatch->getSTNodeChildPtr();
  1646         if(pids[npid-1]==CkMyNode()) npid--;
  1648     CProxy_NodeProxyMgr cnp(thisgroup);
  1649 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE  1652         PersistentHandle *treephs = ppatch->getSpanningTreePhs(ntreephs);
  1653         CmiAssert(treephs && ntreephs >= npid);
  1654         CmiUsePersistentHandle(treephs, ntreephs);
  1657     for(
int i=0; i<npid; i++) {
  1659         cnp[pids[i]].recvImmediateProxyData(copymsg);
  1661 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE  1662     CmiUsePersistentHandle(NULL, 0);
  1666 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)  1667     msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
  1671     CkAbort(
"Bad execution path to NodeProxyMgr::recvImmediateProxyData\n");
  1677 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)  1679         CProxy_NodeProxyMgr cnp(CkpvAccess(BOCclass_group).nodeProxyMgr);
  1680         for(
int i=0; i<pcnt-1; i++) {
  1682             cnp[pids[i]].recvImmediateProxyAll(copymsg);
  1684         cnp[pids[pcnt-1]].recvImmediateProxyAll(msg);
  1688   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
  1689   cp.recvImmediateProxyAll(msg,pcnt,pids);
  1695 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR) && (CMK_SMP) && defined(NAMDSRC_IMMQD_HACK)  1701         CkpvAccess(_qd)->create();
  1712   #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)  1713   DebugFileTrace *dft = DebugFileTrace::Object();
  1715   dft->writeTrace(
"PMgr::recvImmPAll for patch[%d]\n", msg->
patch);
  1716   CmiAssert(proxy!=NULL);
  1717   dft->writeTrace(
"PMgr::recvImmPAll assertion OK for patch[%d]\n", msg->
patch);
  1728 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE  1730         PersistentHandle *treephs = proxy->getSpanningTreePhs(ntreephs);
  1731         CmiAssert(treephs && ntreephs == npid);
  1732         CmiUsePersistentHandle(treephs, ntreephs);
  1735 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE  1736         CmiUsePersistentHandle(NULL, 0);
  1741   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
  1742   cp[CkMyPe()].recvProxyAll(msg);
  1746 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)  1747     CProxy_ProxyMgr cp(localProxyMgr);
  1749     CmiAssert(ptn->
numPes!=0);
  1750     #if defined(PROCTRACE_DEBUG) && defined(NAST_DEBUG)  1752     printf(
"NodePMgr::recvImmPAll for patch[%d] on node %d rank %d, prepare to send proc ", msg->
patch, CkMyNode(), CkMyRank());
  1753     for(
int i=0; i<ptn->
numPes; i++) {
  1754         printf(
"%d, ", ptn->
peIDs[i]);
  1762     int rank = CkRankOf(ptn->
peIDs[0]);
  1763     PatchMap *pmap = localPatchMaps[rank];
  1766     int npid = ppatch->getSTNNodeChild();
  1767     int *pids = ppatch->getSTNodeChildPtr();
  1772         if(pids[npid-1]==CkMyNode()) npid--;
  1775 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE  1778         PersistentHandle *treephs = ppatch->getSpanningTreePhs(ntreephs);
  1779         CmiAssert(treephs && ntreephs >= npid);
  1780         CmiUsePersistentHandle(treephs, ntreephs);
  1783     CProxy_NodeProxyMgr cnp(thisgroup);
  1784     for(
int i=0; i<npid; i++) {
  1786         cnp[pids[i]].recvImmediateProxyAll(copymsg);
  1788 #if CMK_PERSISTENT_COMM && USE_PERSISTENT_TREE  1789     CmiUsePersistentHandle(NULL, 0);
  1793 #if CMK_SMP && defined(NAMDSRC_IMMQD_HACK)  1794     msg->isFromImmMsgCall = (CkMyRank()==CkMyNodeSize());
  1798     CkAbort(
"Bad execution path to NodeProxyMgr::recvImmediateProxyData\n");
  1802 void ProxyMgr::printProxySpanningTree(){
  1803 #ifdef NODEAWARE_PROXY_SPANNINGTREE  1805     for(
int i=0; i<numPatches; i++) {
  1807         printf(
"ST tree for HomePatch[%d]: #nodes = %d\n", i, oneList.
size()); 
  1809         printf(
"===%d=== pes/node: ", i);
  1810         for(
int j=0; j<oneList.
size(); j++) {
  1811             printf(
"%d ", oneList.
item(j).numPes);
  1814         printf(
"===%d=== pe ids: ", i);
  1815         for(
int j=0; j<oneList.
size(); j++) {
  1816             for(
int k=0; k<oneList.
item(j).numPes; k++) {
  1817                 printf(
"%d ", oneList.
item(j).peIDs[k]);
  1825     for(
int i=0; i<numPatches; i++) {
  1827         printf(
"ST tree for HomePatch[%d]: #nodes = %d\n", i, oneList.
size()); 
  1829         printf(
"===%d=== pe ids: ", i);
  1830         for(
int j=0; j<oneList.
size(); j++) {            
  1831             printf(
"%d ", oneList.
item(j));            
  1840     if(proxyInfo[patchID]) {
  1841         delete proxyInfo[patchID];
  1844         proxyInfo[patchID] = NULL;
  1846         proxyInfo[patchID] = 
new proxyTreeNode(CkNodeOf(pes[0]),numPes,pes);
  1851   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
  1853   CmiEnableUrgentSend(1);
  1854   cp[node].recvResult(msg);
  1855   CmiEnableUrgentSend(0);
  1866   CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
  1868   CmiEnableUrgentSend(1);
  1869   cp[node].recvResult(msg);
  1870   CmiEnableUrgentSend(0);
  1883         int totalPatches = 0;
  1884         int totalProxies = 0;
  1885         for(
int i=0; i<bufSize; i++) {
  1890         totalPatches += size;
  1891         for(
int i=0; i<size; i++) {
  1896         int msgPatchIdx = 0;
  1897         int msgProxyPeIdx = 0;
  1898         for(
int i=0; i<bufSize; i++) {
  1905                         memcpy(msg->
proxyPEs+msgProxyPeIdx, one->
proxyPEs+curPeIdx, 
sizeof(
int)*curListLen);
  1906                         curPeIdx += curListLen;
  1907                         msgProxyPeIdx += curListLen;
  1910         for(
int i=0; i<size; i++) {
  1915                 msgProxyPeIdx += curListLen;
  1920 #define HOMEPATCH_TREE_BRFACTOR 2  1923         std::vector<int> nodesWithPatches; 
  1925         for(
int nodeId=0; nodeId<CkNumNodes(); ++nodeId) {
  1927                 int firstPe = CkNodeFirst(nodeId);
  1928                 int endPe = firstPe + CkNodeSize(nodeId);
  1929                 for(
int pe=firstPe; pe < endPe; ++pe) {
  1932                 if(hpCnt==0) 
continue;
  1934                 nodesWithPatches.push_back(nodeId);
  1935                 if(CkMyNode() == nodeId) {
  1937                         myNodeIdx = nodesWithPatches.size()-1;
  1938                         numHomePatches = hpCnt;
  1939                         homepatchRecved = 0;
  1955         if(myNodeIdx == 0) {
  1959                 parentNode = nodesWithPatches[parentIdx];
  1964         int totalNodes = nodesWithPatches.size();
  1967                 if(kidId >= totalNodes) 
break;
  1970         if(numKidNodes!=0) {
  1980         CmiLock(localDepositLock);
  1981         insertIdx = homepatchRecved++; 
  1983         localProxyLists[insertIdx].
patchID = pid;
  1984         localProxyLists[insertIdx].
numProxies = size;
  1985         localProxyLists[insertIdx].
proxyList = plist;
  1987         if(insertIdx == (numHomePatches-1)) {
  1991         CmiUnlock(localDepositLock);
  1996         CmiLock(localDepositLock);
  1997         insertIdx = kidRecved++;
  1999         remoteProxyLists[insertIdx] = msg;
  2000         if(insertIdx == (numKidNodes-1)) {
  2004         CmiUnlock(localDepositLock);
  2008         if(homepatchRecved!=numHomePatches || kidRecved != numKidNodes) 
return;
  2010         homepatchRecved = 0;
  2014         if(parentNode == -1) {
  2016                 CProxy_ProxyMgr cp(CkpvAccess(BOCclass_group).proxyMgr);
  2017                 cp[0].recvPatchProxyInfo(msg);
  2019                 CProxy_NodeProxyMgr cnp(thisgroup);
  2020                 cnp[parentNode].sendProxyListInfo(msg);
  2022         for(
int i=0; i<numKidNodes; i++) {
  2023                 delete remoteProxyLists[i];
  2027 #include "ProxyMgr.def.h" Elem * find(const Elem &elem)
 
void copy(ResizeArray< Elem > &ra)
 
static void * pack(ProxyResultMsg *msg)
 
std::ostream & iINFO(std::ostream &s)
 
static ProxyResultVarsizeMsg * getANewMsg(NodeID nid, PatchID pid, int prioSize, ForceList *fls)
 
void recvImmediateResults(ProxyCombinedResultRawMsg *)
 
void registerProxy(RegisterProxyMsg *)
 
void recvSpanningTree(ProxySpanningTreeMsg *)
 
void sendSpanningTreeToHomePatch(int pid, int *tree, int n)
 
void recvNodeAwareSTParent(int patch, int parent)
 
static ProxyMgr * Object()
 
void recvProxyAll(ProxyDataMsg *)
 
int flLen[Results::maxNumForces]
 
void createSTForHomePatches(PatchMap *pmap)
 
static PatchMap * Object()
 
void sendProxies(int pid, int *list, int n)
 
void buildProxySpanningTree2()
 
void recvImmediateResults(ProxyCombinedResultRawMsg *)
 
void sendNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *)
 
void basePatchIDList(int pe, PatchIDList &)
 
void recvNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *msg)
 
static ProxyCombinedResultMsg * fromRaw(ProxyCombinedResultRawMsg *msg)
 
HomePatchList * homePatchList()
 
static void processCpuLoad()
 
std::ostream & endi(std::ostream &s)
 
void receiveAll(ProxyDataMsg *)
 
int upstreamNeighbors(int pid, PatchID *neighbor_ids)
 
void receiveResults(ProxyResultVarsizeMsg *msg)
 
void recvRegisterProxy(RegisterProxyMsg *)
 
ResizeArrayIter< T > begin(void) const
 
int add(const Elem &elem)
 
int getSpanningTreeNChild(void)
 
static double averageLoad
 
Patch * patch(PatchID pid)
 
HomePatch * homePatch(PatchID pid)
 
int add(const Elem &elem)
 
static void outputProxyTree(ProxyTree &ptree, int np)
 
static ProxyCombinedResultRawMsg * toRaw(ProxyCombinedResultMsg *msg)
 
UniqueSetIter< T > begin(void) const
 
void setall(const Elem &elem)
 
void unregisterPatch(PatchID pid, HomePatch *pptr)
 
static ProxyNodeAwareSpanningTreeMsg * getANewMsg(PatchID pid, NodeID nid, proxyTreeNode *tree, int size)
 
void unregisterProxy(PatchID pid)
 
int numPatches(void) const
 
void buildProxySpanningTree()
 
void unregisterProxy(UnregisterProxyMsg *)
 
ProxyCombinedResultMsg * depositCombinedResultRawMsg(ProxyCombinedResultRawMsg *)
 
void NAMD_bug(const char *err_msg)
 
ComputeType type(ComputeID cid)
 
void recvImmediateProxyAll(ProxyDataMsg *)
 
void recvNodeAwareSpanningTreeOnHomePatch(ProxyNodeAwareSpanningTreeMsg *msg)
 
void removeUnusedProxies(void)
 
void receiveData(ProxyDataMsg *)
 
void homePatchIDList(PatchIDList &)
 
void recvResult(ProxyGBISP1ResultMsg *)
 
void recvProxies(int pid, int *list, int n)
 
void recvProxyData(ProxyDataMsg *)
 
static PatchProxyListMsg * createPatchProxyListMsg(PatchProxyListMsg **bufs, int bufSize, ProxyListInfo *info, int size)
 
void recvSpanningTree(int *t, int n)
 
void recvData(ProxyGBISP2DataMsg *)
 
void recvSpanningTreeOnHomePatch(int pid, int *tree, int n)
 
PatchID getPatchID() const
 
void buildSpanningTree0()
 
#define ALLOCA(TYPE, NAME, SIZE)
 
void createProxy(PatchID pid)
 
void recvImmediateProxyAll(ProxyDataMsg *msg)
 
UniqueSetIter< T > end(void) const
 
ForceList * forceList[Results::maxNumForces]
 
#define HOMEPATCH_TREE_BRFACTOR
 
void recvResults(ProxyResultVarsizeMsg *)
 
void setSpanningTree(int, int *, int)
 
void sendProxyData(ProxyDataMsg *, int, int *)
 
void sendSpanningTree(ProxySpanningTreeMsg *)
 
void recvImmediateProxyData(ProxyDataMsg *)
 
ProxyCombinedResultMsg * depositCombinedResultMsg(ProxyCombinedResultMsg *)
 
#define PACK_RESIZE(DATA)
 
static int noInterNode(int p)
 
void sendNodeAwareSpanningTreeToHomePatch(int pid, proxyTreeNode *tree, int n)
 
void sendProxyList(int pid, int *plist, int size)
 
void sendProxyAll(ProxyDataMsg *, int, int *)
 
ForceList * forceList[Results::maxNumForces]
 
static ComputeMap * Object()
 
void registerProxy(PatchID pid)
 
void sendResults(ProxyResultVarsizeMsg *)
 
int find(const Elem &e) const
 
int flLen[Results::maxNumForces]
 
int getSpanningTreeParent()
 
void setProxyTreeBranchFactor(int dim)
 
const int * getSpanningTreeChildPtr()
 
void buildSpanningTree(void)
 
int numPids(ComputeID cid)
 
int numPatchesOnNode(int node)
 
static ProxyResultMsg * unpack(void *ptr)
 
void receiveResult(ProxyGBISP1ResultMsg *msg)
 
int pid(ComputeID cid, int i)
 
static int compLoad(const void *a, const void *b)
 
void sendResult(ProxyGBISP1ResultMsg *)
 
PACK_MSG(ProxySpanningTreeMsg, PACK(patch);PACK(node);PACK_RESIZE(tree);)
 
void registerPatch(PatchID pid, HomePatch *pptr)
 
void registerPatch(int patchID, int numPes, int *pes)
 
void swap(ResizeArray< Elem > &ra)
 
int del(const Elem &elem)
 
void recvNodeAwareSpanningTree(ProxyNodeAwareSpanningTreeMsg *)
 
void recvPatchProxyInfo(PatchProxyListMsg *msg)
 
void removeProxy(PatchID pid)
 
ResizeArrayIter< T > end(void) const
 
void contributeToParent()
 
void sendProxyListInfo(PatchProxyListMsg *msg)
 
PatchProxyListMsg(int num)
 
void recvUnregisterProxy(UnregisterProxyMsg *)
 
void recvImmediateProxyData(ProxyDataMsg *msg)