27 #include "WorkDistrib.decl.h"    31 #include "main.decl.h"    51 #include "TopoManager.h"    56 #if defined(NAMD_CUDA) || defined(NAMD_HIP)    58 #define __thread __declspec(thread)    64 #define MIN_DEBUG_LEVEL 2    66 #ifdef MEM_OPT_VERSION    94   randtopo = CmiGetArgFlag(argv, 
"+randtopo");
    95   if ( CkMyPe() >= CkNumPes() ) 
return;
    96 #if CCD_COND_FN_EXISTS    97   CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdCondFn)
build_ordering, (
void*)0);
    99   CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)
build_ordering, (
void*)0);
   110   CkpvAccess(BOCclass_group).workDistrib = thisgroup;
   111   patchMapArrived = 
false;
   112   computeMapArrived = 
false;
   115 #define MACHINE_PROGRESS   117 #define MACHINE_PROGRESS { traceUserEvent(eventMachineProgress);  CmiMachineProgressImpl(); }   118   if ( CkMyNodeSize() > 1 ) 
NAMD_bug(
"CkMyNodeSize() > 1 for non-smp build");
   130   if ( d ) 
while ( ! (d & c) ) {
   133   return (a & c) - (b & c);
   139   if ( d ) 
while ( ! (d & c) ) {
   150     if ( c < 0 ) 
return true;
   151     if ( c > 0 ) 
return false;
   154     if ( c < 0 ) 
return true;
   155     if ( c > 0 ) 
return false;
   167 #if defined(NAMD_CUDA) || defined(NAMD_HIP)   175 #if defined(NAMD_CUDA) || defined(NAMD_HIP)   189   const int numPhys = CmiNumPhysicalNodes();
   190   const int numNode = CmiNumNodes();
   191   const int numPe = CmiNumPes();
   201   for ( 
int ph=0; ph<numPhys; ++ph ) {
   203     CmiGetPesOnPhysicalNode(ph, &pes, &npes);
   204     for ( 
int i=0; i<npes; ++i, ++k ) {
   207     numNodeInPhys[ph] = 0;
   208     for ( 
int i=0, j=0; i<npes; i += CmiNodeSize(CmiNodeOf(pes[i])), ++j ) {
   209       rankInPhysOfNode[CmiNodeOf(pes[i])] = j;
   210       numNodeInPhys[ph] += 1;
   215     if ( ! CkMyNode() ) {
   216       iout << 
iWARN << 
"RANDOMIZING PHYSICAL NODE ORDERING\n" << 
endi;
   219     for ( 
int j=0; j<numPhys; ++j ) {
   220       randPhysOrder[j] = j;
   223     for ( 
int j=0, k=0; j<numPhys; ++j ) {
   224       const int ph = randPhysOrder[j];
   226       CmiGetPesOnPhysicalNode(ph, &pes, &npes);
   227       for ( 
int i=0; i<npes; ++i, ++k ) {
   233   for ( 
int i=0; i<numPe; ++i ) {
   239   for ( 
int i=0; i<numPe; ++i ) {
   244   if ( 0 && CmiMyNode() == 0 ) 
for ( 
int i=0; i<numPe; ++i ) {
   245     CkPrintf(
"order %5d %5d %5d %5d %5d\n", i,
   276     int x_begin, 
int x_end, 
int y_begin, 
int y_end,
   278     int *result, 
int ydim
   280   int x_len = x_end - x_begin;
   281   int y_len = y_end - y_begin;
   282   if ( x_len == 1 && y_len == 1 ) {
   284     if ( 0 ) CkPrintf(
"pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
   285       coord[*pe_begin].x, coord[*pe_begin].y);
   286     result[x_begin*ydim + y_begin] = *pe_begin;
   289   int *pe_end = pe_begin + x_len * y_len;
   290   if ( x_len >= y_len ) {
   292     int x_split = x_begin + x_len / 2;
   293     int* pe_split = pe_begin + (x_split - x_begin) * y_len;
   299     int y_split = y_begin + y_len / 2;
   300     int* pe_split = pe_begin + (y_split - y_begin) * x_len;
   308   int numpes = CkNumPes();
   312   for ( 
int i=0; i<numpes; ++i ) {
   318   for ( 
int i=0, npatches=patchMap->
numPatches(); i<npatches; ++i ) {
   319     int pe = patchMap->
node(i);
   321     sumPos[pe] += patchMap->
center(i);
   323   const int npmepes = xdim*ydim;
   325   for ( 
int i=0; i<npmepes; ++i ) {
   326     int pe = sortpes[i] = pmepes[i];
   331       int node = CkNodeOf(pe);
   332       int nsize = CkNodeSize(node);
   333       int pe2 = CkNodeFirst(node);
   334       for ( 
int j=0; j<nsize; ++j, ++pe2 )  {
   341       int node = CmiPhysicalNodeID(pe);
   343       CmiGetPesOnPhysicalNode(node, &nlist, &nsize);
   344       for ( 
int j=0; j<nsize; ++j )  {
   351       avgPos[pe] = sum / cnt;
   361   saveComputeMapReturnEP = ep;
   362   saveComputeMapReturnChareID = chareID;
   365   CProxy_WorkDistrib(thisgroup).recvComputeMapChanges(mapMsg);
   390     for (i=0; i<nc; i++) {
   391       int data = computeMap->
newNode(i);
   395     for (i=0; i<nc; i++) {
   403   } 
else if ( ! CkMyRank() ) { 
   407     if ( i != nc ) 
NAMD_bug(
"WorkDistrib::recvComputeMapChanges check 1 failed\n");
   408     for (i=0; i<nc; i++) {
   414     if ( i != nc ) 
NAMD_bug(
"WorkDistrib::recvComputeMapChanges check 2 failed\n");
   415     for (i=0; i<nc; i++) {
   421     if ( i != nc ) 
NAMD_bug(
"WorkDistrib::recvComputeMapChanges check 3 failed\n");
   426   CkCallback cb(CkIndex_WorkDistrib::doneSaveComputeMap(NULL), 0, thisgroup);
   427   contribute(0, NULL, CkReduction::random, cb);
   433   CkSendMsgBranch(saveComputeMapReturnEP, CkAllocMsg(0,0,0), 0, saveComputeMapReturnChareID);
   436 #ifdef MEM_OPT_VERSION   441 void WorkDistrib::fillAtomListForOnePatch(
int pid, 
FullAtomList &alist){
   445                           0.5*(patchMap->
min_b(pid)+patchMap->
max_b(pid)),
   446                           0.5*(patchMap->
min_c(pid)+patchMap->
max_c(pid)));
   448     int n = alist.
size();
   464     for(
int j=0; j < n; j++)
   471       if ( a[j].migrationGroupSize ) {
   472        if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
   477             for ( 
int k=a[j].hydrogenGroupSize; k<mgs;
   485             pos = lattice.
nearest(pos,center,&mother_transform);
   489         a[j].
position = lattice.
nearest(a[j].position, center, &(a[j].transform));
   518         }
else if ((a[j].status & 
DrudeAtom)!=0) {
   533     for(
int j=0; j < n; j+=size) {
   536         NAMD_bug(
"Mother atom with hydrogenGroupSize of 0!");
   539       for (
int k = 0; k < size; ++k ) {
   540         allfixed = ( allfixed && (a[j+k].
atomFixed) );
   542       for (
int k = 0; k < size; ++k ) {
   548       if (a[j].rigidBondLength > 0) {
   549         if (size != wathgsize) {
   552               "Water molecule starting with atom %d contains %d atoms "   553               "but the specified water model requires %d atoms.\n",
   554               a[j].
id+1, size, wathgsize
   559         for (
int k = 0;  k < size;  k++) {
   560           anyfixed += ( fixedAtomsOn && a[j+k].
atomFixed );
   562         if (useSettle && !anyfixed) {
   563           for (
int k = 0;  k < size;  k++) {
   572       int numAtomsInPatch = n;
   573       int numFixedAtomsInPatch = 0;
   574       int numAtomsInFixedGroupsInPatch = 0;
   575       for(
int j=0; j < n; j++) {
   576         numFixedAtomsInPatch += ( a[j].
atomFixed ? 1 : 0 );
   577         numAtomsInFixedGroupsInPatch += ( a[j].
groupFixed ? 1 : 0 );
   579       iout << 
"PATCH_DETAILS:"   580            << 
" on proc " << CkMyPe()
   581            << 
" patch " << patchId
   582            << 
" atoms " << numAtomsInPatch
   583            << 
" fixed_atoms " << numFixedAtomsInPatch
   584            << 
" fixed_groups " << numAtomsInFixedGroupsInPatch
   599   int lesReduceTemp = lesOn && 
simParams->lesReduceTemp;
   604   int totalAtoms = inAtoms.
size();
   605   for(i=0;i<totalAtoms;i++)
   607     Real atomMs=inAtoms[i].mass;
   619       kbToverM = sqrt(kbT * 1.0 / atomMs);
   621     for (randnum=0.0, j=0; j<12; j++)
   623       randnum += vel_random.uniform();
   628     inAtoms[i].velocity.x = randnum*kbToverM;
   630     for (randnum=0.0, j=0; j<12; j++)
   632       randnum += vel_random.uniform();
   637     inAtoms[i].velocity.y = randnum*kbToverM;
   639     for (randnum=0.0, j=0; j<12; j++)
   641       randnum += vel_random.uniform();
   646     inAtoms[i].velocity.z = randnum*kbToverM;
   658   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
   659   Node *node = nd.ckLocalBranch();
   661   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
   662   PatchMgr *patchMgr = pm.ckLocalBranch();
   675     read_binary_file((std::string(basename)+
".coor").c_str(), positions, numAtoms);
   676     read_binary_file((std::string(basename)+
".vel").c_str(), velocities, numAtoms);
   678     PDB coorpdb((std::string(basename)+
".coor").c_str());
   680       NAMD_die(
"Incorrect atom count in coordinate pdb file");
   683     velocities_from_PDB((std::string(basename)+
".vel").c_str(), velocities, numAtoms);
   694     if (current == NULL) {
   700       velocities_from_PDB(current->
data, velocities, numAtoms);
   703       velocities_from_binfile(current->
data, velocities, numAtoms);
   708     random_velocities(params->
initialTemp, molecule, velocities, numAtoms);
   714     remove_com_motion(velocities, molecule, numAtoms);
   723       for ( i=0; i < numAtoms; i++ ) {
   725         if ( ! h.
isMP ) 
continue;
   733       for ( i=0; i < sortAtoms.
size(); i++ ) {
   736       int *breaks = 
new int[numPatches];
   738                         sortAtoms.
size(),numAtoms,
   744       for ( 
int pid = 0; pid < numPatches; ++pid ) {
   745         int iend = breaks[pid];
   746         for ( ; i<iend; ++i ) {
   755           for ( 
int k=0; k<mgs; ++k ) {
   775 CkPrintf(
"patch %d (%d %d %d) has %d atoms\n",
   785     for(i=0; i < numAtoms; i++)
   814   delete [] velocities;
   816   for(i=0; i < numPatches; i++)
   822     int n = atoms[i].
size();
   844       if ( a[j].migrationGroupSize ) {
   845        if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
   849             for ( 
int k=a[j].hydrogenGroupSize; k<mgs;
   856             pos = lattice.
nearest(pos,center,&mother_transform);
   861                 a[j].position, center, &(a[j].transform));
   874         const int index = a[j].
vdwType;
   876         float sigma, epsilon, sigma14, epsilon14;
   877         molecule->params->
get_vdw_params(&sigma, &epsilon, &sigma14, &epsilon14, index);
   878         a[j].
dispcoef = 2*sigma*sigma*sigma*sqrt(scaling * epsilon);
   885       if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
   896     int size, allfixed, k;
   897     for(j=0; j < n; j+=size) {
   900         NAMD_bug(
"Mother atom with hydrogenGroupSize of 0!");
   903       for ( k = 0; k < size; ++k ) {
   904         allfixed = ( allfixed && (a[j+k].
atomFixed) );
   906       for ( k = 0; k < size; ++k ) {
   912       if (a[j].rigidBondLength > 0) {
   913         for (k = 0;  k < size;  k++) {
   926     for(
int j=0; j < n; j+=size) {
   929         NAMD_bug(
"Mother atom with hydrogenGroupSize of 0!");
   932       for (
int k = 0; k < size; ++k ) {
   933         allfixed = ( allfixed && (a[j+k].
atomFixed) );
   935       for (
int k = 0; k < size; ++k ) {
   941       if (a[j].rigidBondLength > 0) {
   942         if (size != wathgsize) {
   945               "Water molecule starting with atom %d contains %d atoms "   946               "but the specified water model requires %d atoms.\n",
   947               a[j].
id+1, size, wathgsize
   952         for (
int k = 0;  k < size;  k++) {
   953           anyfixed += ( fixedAtomsOn && a[j+k].
atomFixed );
   955         if (useSettle && !anyfixed) {
   956           for (
int k = 0;  k < size;  k++) {
   966       int numAtomsInPatch = n;
   967       int numFixedAtomsInPatch = 0;
   968       int numAtomsInFixedGroupsInPatch = 0;
   969       for(j=0; j < n; j++) {
   970         numFixedAtomsInPatch += ( a[j].
atomFixed ? 1 : 0 );
   971         numAtomsInFixedGroupsInPatch += ( a[j].
groupFixed ? 1 : 0 );
   973       iout << 
"PATCH_DETAILS:"   974            << 
" patch " << patchId
   975            << 
" atoms " << numAtomsInPatch
   976            << 
" fixed_atoms " << numFixedAtomsInPatch
   977            << 
" fixed_groups " << numAtomsInFixedGroupsInPatch
   993   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
   994   PatchMgr *patchMgr = pm.ckLocalBranch();
  1000 #ifdef MEM_OPT_VERSION  1010   for(i=0; i < numPatches; i++) {
  1011     int numAtoms = atoms[i].
size();
  1012     if ( numAtoms > maxAtoms ) { maxAtoms = numAtoms; maxPatch = i; }
  1014   iout << 
iINFO << 
"LARGEST PATCH (" << maxPatch <<
  1015         ") HAS " << maxAtoms << 
" ATOMS\n" << 
endi;
  1017 #ifdef SHOW_HISTOGRAM_HGROUP_SIZES  1019   int hgroupsize[9] = { 0 };
  1022   int maxhgroupsize = 0;
  1023   for (i = 0;  i < numPatches;  i++) {
  1025     int numAtoms = a.
size();
  1027     for (
int j = 0;  j < numAtoms;  j += hgs) {
  1028       hgs = a[j].hydrogenGroupSize;
  1029       int histndx = ( hgs > 8 ? 8 : hgs );
  1030       hgroupsize[ histndx ]++;
  1032       if (a[j].
isWater) numwaters++;
  1033       if (maxhgroupsize < hgs) maxhgroupsize = hgs;
  1036   int hgslast = ( maxhgroupsize > 8 ? 8 : maxhgroupsize );
  1037   printf(
"Number of hydrogen groups:           %7d\n", numhgroups);
  1038   printf(
"Number of settle water molecules:    %7d\n", numwaters);
  1039   printf(
"Number of remaining hydrogen groups: %7d\n", numhgroups - numwaters);
  1040   printf(
"Largest hydrogen group size:         %7d\n", maxhgroupsize);
  1041   printf(
"Histogram of hydrogen group sizes:\n");
  1043   for (i = 0;  i <= hgslast;  i++) {
  1044     printf(
"     size %d     count %d\n", i, hgroupsize[i]);
  1045     hgstotal += hgroupsize[i];
  1047   printf(
"Checksum over hydrogen group sizes:  %7d\n", hgstotal);
  1050   for(i=0; i < numPatches; i++)
  1052     if ( ! ( i % 100 ) )
  1054       DebugM(3,
"Created " << i << 
" patches so far.\n");
  1065   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
  1066   Node *node = nd.ckLocalBranch();
  1067   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
  1068   PatchMgr *patchMgr = pm.ckLocalBranch();
  1075     if (patchMap->
node(i) != node->
myid() )
  1077       DebugM(3,
"patchMgr->movePatch("  1078         << i << 
"," << patchMap->
node(i) << 
")\n");
  1088   CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
  1089   PatchMgr *patchMgr = pm.ckLocalBranch();
  1095   for(
int i=0; i < numPatches; i++) {
  1113   if ( CkNumPes() == 1 ) {
  1114     patchMapArrived = 
true;
  1119   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
  1120   Node *node = nd.ckLocalBranch();
  1123 #ifdef NODEAWARE_PROXY_SPANNINGTREE 
  1124       || CkNumPes() > CkNumNodes()
  1125       ) && ( CkNumNodes() > 1
  1130 #ifdef NODEAWARE_PROXY_SPANNINGTREE   1131   if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
  1142   CProxy_WorkDistrib workProxy(thisgroup);
  1143   workProxy[0].savePatchMap(mapMsg);
  1155   if ( CkMyRank() ) patchMapArrived = 
true;
  1157   if ( patchMapArrived && CkMyPe() ) {
  1161     CProxy_Node nd(CkpvAccess(BOCclass_group).node);
  1162     Node *node = nd.ckLocalBranch();
  1165 #ifdef NODEAWARE_PROXY_SPANNINGTREE 
  1166         || CkNumPes() > CkNumNodes()
  1167         ) && ( CkNumNodes() > 1
  1172 #ifdef NODEAWARE_PROXY_SPANNINGTREE   1173     if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
  1179   if ( patchMapArrived ) {
  1180     if ( CkMyRank() + 1 < CkNodeSize(CkMyNode()) ) {
  1181       ((CProxy_WorkDistrib(thisgroup))[CkMyPe()+1]).
savePatchMap(msg);
  1188   patchMapArrived = 
true;
  1190   int self = CkMyNode();
  1191   int range_begin = 0;
  1192   int range_end = CkNumNodes();
  1193   while ( 
self != range_begin ) {
  1195     int split = range_begin + ( range_end - range_begin ) / 2;
  1197     else { range_begin = 
split; }
  1199   int send_near = 
self + 1;
  1200   int send_far = send_near + ( range_end - send_near ) / 2;
  1204   if ( send_far < range_end ) pids[npid++] = CkNodeFirst(send_far);
  1205   if ( send_near < send_far ) pids[npid++] = CkNodeFirst(send_near);
  1206   pids[npid++] = CkMyPe();  
  1207   CProxy_WorkDistrib(thisgroup).savePatchMap(msg,npid,pids);
  1213   if ( CkMyRank() ) 
return;
  1215   if ( CkNumNodes() == 1 ) {
  1216     computeMapArrived = 
true;
  1226   } 
else if ( ! CkMyRank() ) { 
  1232   computeMapArrived = 
true;
  1241   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
  1242   Node *node = nd.ckLocalBranch();
  1248 #ifndef MEM_OPT_VERSION  1256   double maxNumPatches = 1.e9;  
  1260   DebugM(3,
"Mapping patches\n");
  1261   if ( lattice.
a_p() && lattice.
b_p() && lattice.
c_p() ) {
  1262     xmin = 0.;  xmax = 0.;
  1274     printf(
"+++ center=%.4f %.4f %.4f\n",
  1276     printf(
"+++ xmin=%.4f  xmax=%.4f\n", xmin.
x, xmax.
x);
  1277     printf(
"+++ ymin=%.4f  ymax=%.4f\n", xmin.
y, xmax.
y);
  1278     printf(
"+++ zmin=%.4f  zmax=%.4f\n", xmin.
z, xmax.
z);
  1290     iout << 
iINFO << 
"ORIGINAL ATOMS MINMAX IS " << xmin << 
"  " << xmax << 
"\n" << 
endi;
  1291     double frac = ( (double)totalAtoms - 10000. ) / (double)totalAtoms;
  1292     if ( frac < 0.9 ) { frac = 0.9; }
  1295     iout << 
iINFO << 
"ADJUSTED ATOMS MINMAX IS " << xmin << 
"  " << xmax << 
"\n" << 
endi;
  1300   origin_shift = lattice.
a_r() * lattice.
origin();
  1301   xmin.
x -= origin_shift;
  1302   xmax.
x -= origin_shift;
  1303   origin_shift = lattice.
b_r() * lattice.
origin();
  1304   xmin.
y -= origin_shift;
  1305   xmax.
y -= origin_shift;
  1306   origin_shift = lattice.
c_r() * lattice.
origin();
  1307   xmin.
z -= origin_shift;
  1308   xmax.
z -= origin_shift;
  1317   if (params->
LCPOOn && patchSize < 32.4) {
  1318     if ( twoAwayX > 0 || twoAwayY > 0 || twoAwayZ > 0 ) {
  1319       iout << 
iWARN << 
"Ignoring twoAway[XYZ] due to LCPO SASA implementation.\n" << 
endi;
  1321     twoAwayX = twoAwayY = twoAwayZ = 0;
  1325   if ( twoAwayX > 0 ) maxNumPatches = 1.e9;
  1326   if ( twoAwayY > 0 ) maxNumPatches = 1.e9;
  1327   if ( twoAwayZ > 0 ) maxNumPatches = 1.e9;
  1330       iout << 
iINFO << 
"LIMITING NUMBER OF PATCHES TO " <<
  1331                                 maxNumPatches << 
"\n" << 
endi;
  1334   int numpes = CkNumPes();
  1338     delete [] patchMap->nPatchesOnNode;
  1339     patchMap->nPatchesOnNode = 
new int[numpes];
  1340     memset(patchMap->nPatchesOnNode, 0, numpes*
sizeof(
int));    
  1343 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)  1346   int numPatches = patchMap->
sizeGrid(
  1348         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
  1349   if ( numPatches < numpes && twoAwayX < 0 ) {
  1353         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
  1355   if ( numPatches < numpes && twoAwayY < 0 ) {
  1359         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
  1361   if ( numPatches < numpes && twoAwayZ < 0 ) {
  1365         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
  1367   if ( numPatches < numpes ) {
  1368     #if defined(NAMD_MIC)  1369     NAMD_die(
"MIC-enabled NAMD requires at least one patch per thread.");
  1372       NAMD_die(
"GPU-resident NAMD requires at least one patch per thread.");
  1376   if ( numPatches % numpes && numPatches <= 1.4 * numpes ) {
  1377     int exactFit = numPatches - numPatches % numpes;
  1378     int newNumPatches = patchMap->
sizeGrid(
  1380         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
  1381     if ( newNumPatches == exactFit ) {
  1382       iout << 
iINFO << 
"REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << 
endi;
  1383       maxNumPatches = exactFit;
  1387   patchMap->
makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
  1389         twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
  1393   int availPes = numpes;
  1399 #ifdef MEM_OPT_VERSION  1412   int numPatches = patchMap->
sizeGrid(
  1414         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
  1415   if ( ( numPatches > (0.3*availPes) || numPatches > maxNumPatches
  1416        ) && twoAwayZ < 0 ) {
  1420         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
  1422   if ( ( numPatches > (0.6*availPes) || numPatches > maxNumPatches
  1423        ) && twoAwayY < 0 ) {
  1427         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
  1429   if ( ( numPatches > availPes || numPatches > maxNumPatches
  1430        ) && twoAwayX < 0 ) {
  1434         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
  1436   if ( numPatches > availPes && numPatches <= (1.4*availPes) && availPes <= maxNumPatches ) {
  1437     int newNumPatches = patchMap->
sizeGrid(
  1439         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
  1440     if ( newNumPatches <= availPes && numPatches <= (1.4*newNumPatches) ) {
  1441       iout << 
iINFO << 
"REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << 
endi;
  1442       maxNumPatches = availPes;
  1446   patchMap->
makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
  1448         twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
  1465 #if (CMK_BLUEGENEP | CMK_BLUEGENEL) && USE_TOPOMAP   1467   int numPes = tmgr.getDimNX() * tmgr.getDimNY() * tmgr.getDimNZ();
  1468   if (numPes > patchMap->
numPatches() && (assignPatchesTopoGridRecBisection() > 0)) {
  1469     CkPrintf (
"Blue Gene/L topology partitioner finished successfully \n");
  1473   assignPatchesSpaceFillingCurve();       
  1475   int *nAtoms = 
new int[nNodes];
  1478   for(i=0; i < nNodes; i++)
  1488 #ifdef MEM_OPT_VERSION  1489       numAtoms += patchMap->numAtoms(i);
  1490       nAtoms[patchMap->
node(i)] += patchMap->numAtoms(i);         
  1492     if (patchMap->
patch(i)) {
  1499   if ( numAtoms != 
Node::Object()->molecule->numAtoms ) {
  1500     for(i=0; i < nNodes; i++)
  1501       iout << 
iINFO << nAtoms[i] << 
" atoms assigned to node " << i << 
"\n" << 
endi;
  1503     NAMD_die(
"Incorrect atom count in WorkDistrib::assignNodeToPatch\n");
  1545 void WorkDistrib::assignPatchesToLowestLoadNode() 
  1548   int assignedNode = 0;
  1550   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
  1551   Node *node = nd.ckLocalBranch();
  1558   int *load = 
new int[ncpus];
  1559   int *assignedNodes = 
new int[patchMap->
numPatches()];
  1560   for (
int i=0; i<ncpus; i++) {
  1563   CkPrintf(
"assignPatchesToLowestLoadNode\n");
  1564   int defaultNode = 0;
  1565   if ( 
simParams->noPatchesOnZero && ncpus > 1 ){
  1567     if( 
simParams->noPatchesOnOne && ncpus > 2)
  1571   for(pid=0; pid < patchMap->
numPatches(); pid++) {
  1572     assignedNode = defaultNode;
  1573     for (
int i=assignedNode + 1; i < ncpus; i++) {
  1574       if (load[i] < load[assignedNode]) assignedNode = i;
  1576     assignedNodes[pid] = assignedNode;
  1577 #ifdef MEM_OPT_VERSION  1578     load[assignedNode] += patchMap->numAtoms(pid) + 1;
  1585   sortNodesAndAssign(assignedNodes);
  1586   delete[] assignedNodes;
  1590 void WorkDistrib::assignPatchesBitReversal() 
  1594   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
  1595   Node *node = nd.ckLocalBranch();
  1603   if ( ncpus <= npatches )
  1604     NAMD_bug(
"WorkDistrib::assignPatchesBitReversal called improperly");
  1608   for ( 
int i = 1; i < ncpus; ++i ) {
  1613   sortNodesAndAssign(seq.begin());
  1614   if ( ncpus > 2*npatches ) sortNodesAndAssign(seq.begin()+npatches, 1);
  1632     return ((a1 == a2) && (b1 == b2) && (c1 == c2));
  1641     return ( (a1 < a2) || ((a1 == a2) && (b1 < b2)) ||
  1642                 ((a1 == a2) && (b1 == b2) && (c1 < c2)) );
  1646 void WorkDistrib::sortNodesAndAssign(
int *assignedNode, 
int baseNodes) {
  1652   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
  1653   Node *node = nd.ckLocalBranch();
  1661   for ( i=0; i < nnodes; ++i ) {
  1662     allnodes[i].node = i;
  1664   for ( pid=0; pid<npatches; ++pid ) {
  1666     allnodes[assignedNode[pid]].npatches++;
  1667     allnodes[assignedNode[pid]].a_total += patchMap->
index_a(pid);
  1668     allnodes[assignedNode[pid]].b_total += patchMap->
index_b(pid);
  1669     allnodes[assignedNode[pid]].c_total += patchMap->
index_c(pid);
  1672   usednodes.resize(0);
  1673   for ( i=0; i < nnodes; ++i ) {
  1674     if ( allnodes[i].npatches ) usednodes.add(allnodes[i]);
  1678   for ( i=0; i < nnodes; ++i ) {
  1680     if ( allnodes[pe].npatches ) allnodes[usednodes[i2++].node].node = pe;
  1683   for ( pid=0; pid<npatches; ++pid ) {
  1685     if ( ! baseNodes ) {
  1686       patchMap->
assignNode(pid, allnodes[assignedNode[pid]].node);      
  1693 void WorkDistrib::assignPatchesRoundRobin() 
  1697   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
  1698   Node *node = nd.ckLocalBranch();
  1704   int *assignedNode = 
new int[patchMap->
numPatches()];
  1706   for(pid=0; pid < patchMap->
numPatches(); pid++) {
  1707     assignedNode[pid] = pid % ncpus;
  1710   sortNodesAndAssign(assignedNode);
  1711   delete [] assignedNode;
  1715 void WorkDistrib::assignPatchesRecursiveBisection() 
  1718   int *assignedNode = 
new int[patchMap->
numPatches()];
  1725   int usedNodes = numNodes;
  1726   int unusedNodes = 0;
  1727   CkPrintf(
"assignPatchesRecursiveBisection\n");
  1728   if ( 
simParams->noPatchesOnZero && numNodes > 1 ){
  1730     if(
simParams->noPatchesOnOne && numNodes > 2)
  1733   unusedNodes = numNodes - usedNodes;
  1735   if ( recBisec.partition(assignedNode) ) {
  1736     if ( unusedNodes !=0 ) {
  1737       for ( 
int i=0; i<patchMap->
numPatches(); ++i ) {
  1738         assignedNode[i] += unusedNodes;
  1741     sortNodesAndAssign(assignedNode);
  1742     delete [] assignedNode;
  1747     delete [] assignedNode; 
  1750          << 
"WorkDistrib: Recursive bisection fails, "  1751          << 
"invoking space-filling curve algorithm\n";
  1752     assignPatchesSpaceFillingCurve();
  1763     return CmiGetFirstPeOnPhysicalNode(CmiPhysicalNodeID(pe));
  1767     int na=
tmgr.getDimNA();
  1768     int nb=
tmgr.getDimNB();
  1769     int nc=
tmgr.getDimNC();
  1770     int nd=
tmgr.getDimND();
  1771     int ne=
tmgr.getDimNE();
  1773     int na=
tmgr.getDimNX();
  1774     int nb=
tmgr.getDimNY();
  1775     int nc=
tmgr.getDimNZ();
  1784     for ( 
int i=0; i<na; ++i ) { a_flags[i] = 0; }
  1785     for ( 
int i=0; i<nb; ++i ) { b_flags[i] = 0; }
  1786     for ( 
int i=0; i<nc; ++i ) { c_flags[i] = 0; }
  1787     for ( 
int i=0; i<nd; ++i ) { d_flags[i] = 0; }
  1788     for ( 
int i=0; i<ne; ++i ) { e_flags[i] = 0; }
  1789     int npes = CkNumPes();
  1790     for ( 
int pe=0; pe<npes; ++pe ) {
  1793       tmgr.rankToCoordinates(
fixpe(pe),a,b,c,d,e,t);
  1795       tmgr.rankToCoordinates(
fixpe(pe),a,b,c,t);
  1798       if ( a < 0 || a >= na ) 
NAMD_bug(
"inconsistent torus topology!");
  1799       if ( b < 0 || b >= nb ) 
NAMD_bug(
"inconsistent torus topology!");
  1800       if ( c < 0 || c >= nc ) 
NAMD_bug(
"inconsistent torus topology!");
  1801       if ( d < 0 || d >= nd ) 
NAMD_bug(
"inconsistent torus topology!");
  1802       if ( e < 0 || e >= ne ) 
NAMD_bug(
"inconsistent torus topology!");
  1809     iout << 
iINFO << 
"TORUS A SIZE " << na << 
" USING";
  1810     for ( 
int i=0; i<na; ++i ) { 
if ( a_flags[i] ) 
iout << 
" " << i; }
  1812     iout << 
iINFO << 
"TORUS B SIZE " << nb << 
" USING";
  1813     for ( 
int i=0; i<nb; ++i ) { 
if ( b_flags[i] ) 
iout << 
" " << i; }
  1815     iout << 
iINFO << 
"TORUS C SIZE " << nc << 
" USING";
  1816     for ( 
int i=0; i<nc; ++i ) { 
if ( c_flags[i] ) 
iout << 
" " << i; }
  1819     iout << 
iINFO << 
"TORUS D SIZE " << nd << 
" USING";
  1820     for ( 
int i=0; i<nd; ++i ) { 
if ( d_flags[i] ) 
iout << 
" " << i; }
  1822     iout << 
iINFO << 
"TORUS E SIZE " << ne << 
" USING";
  1823     for ( 
int i=0; i<ne; ++i ) { 
if ( e_flags[i] ) 
iout << 
" " << i; }
  1830     if ( 
tmgr.absA(na) == 0 ) 
  1832     if ( 
tmgr.absX(na) == 0 ) 
  1834       for ( 
int i=0, gaplen=0, gapstart=0; i<2*na; ++i ) {
  1835         if ( a_flags[i%na] ) gapstart = i+1;
  1836         else if ( i - gapstart >= gaplen ) {
  1837           a_rot = 2*na-i-1; gaplen = i - gapstart;
  1841     if ( 
tmgr.absB(nb) == 0 ) 
  1843     if ( 
tmgr.absY(nb) == 0 ) 
  1845       for ( 
int i=0, gaplen=0, gapstart=0; i<2*nb; ++i ) {
  1846         if ( b_flags[i%nb] ) gapstart = i+1;
  1847         else if ( i - gapstart >= gaplen ) {
  1848           b_rot = 2*nb-i-1; gaplen = i - gapstart;
  1852     if ( 
tmgr.absC(nc) == 0 ) 
  1854     if ( 
tmgr.absZ(nc) == 0 ) 
  1856       for ( 
int i=0, gaplen=0, gapstart=0; i<2*nc; ++i ) {
  1857         if ( c_flags[i%nc] ) gapstart = i+1;
  1858         else if ( i - gapstart >= gaplen ) {
  1859           c_rot = 2*nc-i-1; gaplen = i - gapstart;
  1863     if ( 
tmgr.absD(nd) == 0 ) 
  1864       for ( 
int i=0, gaplen=0, gapstart=0; i<2*nd; ++i ) {
  1865         if ( d_flags[i%nd] ) gapstart = i+1;
  1866         else if ( i - gapstart >= gaplen ) {
  1867           d_rot = 2*nd-i-1; gaplen = i - gapstart;
  1870     if ( 
tmgr.absE(ne) == 0 ) 
  1871       for ( 
int i=0, gaplen=0, gapstart=0; i<2*ne; ++i ) {
  1872         if ( e_flags[i%ne] ) gapstart = i+1;
  1873         else if ( i - gapstart >= gaplen ) {
  1874           e_rot = 2*ne-i-1; gaplen = i - gapstart;
  1879     int a_min=na, a_max=-1;
  1880     int b_min=nb, b_max=-1;
  1881     int c_min=nc, c_max=-1;
  1882     int d_min=nd, d_max=-1;
  1883     int e_min=ne, e_max=-1;
  1884     for ( 
int pe=0; pe<npes; ++pe ) {
  1887       tmgr.rankToCoordinates(
fixpe(pe),a,b,c,d,e,t);
  1889       tmgr.rankToCoordinates(
fixpe(pe),a,b,c,t);
  1897       if ( a < a_min ) a_min = a;
  1898       if ( b < b_min ) b_min = b;
  1899       if ( c < c_min ) c_min = c;
  1900       if ( d < d_min ) d_min = d;
  1901       if ( e < e_min ) e_min = e;
  1902       if ( a > a_max ) a_max = a;
  1903       if ( b > b_max ) b_max = b;
  1904       if ( c > c_max ) c_max = c;
  1905       if ( d > d_max ) d_max = d;
  1906       if ( e > e_max ) e_max = e;
  1908     int a_len = a_max - a_min + 1;
  1909     int b_len = b_max - b_min + 1;
  1910     int c_len = c_max - c_min + 1;
  1911     int d_len = d_max - d_min + 1;
  1912     int e_len = e_max - e_min + 1;
  1914     lensort[0] = (a_len << 3) + 0;
  1915     lensort[1] = (b_len << 3) + 1;
  1916     lensort[2] = (c_len << 3) + 2;
  1917     lensort[3] = (d_len << 3) + 3;
  1918     lensort[4] = (e_len << 3) + 4;
  1920     std::sort(lensort, lensort+5);
  1922     for ( 
int i=0; i<5; ++i ) { 
if ( (lensort[i] & 7) == 0 ) 
a_dim = 4-i; }
  1923     for ( 
int i=0; i<5; ++i ) { 
if ( (lensort[i] & 7) == 1 ) 
b_dim = 4-i; }
  1924     for ( 
int i=0; i<5; ++i ) { 
if ( (lensort[i] & 7) == 2 ) 
c_dim = 4-i; }
  1925     for ( 
int i=0; i<5; ++i ) { 
if ( (lensort[i] & 7) == 3 ) 
d_dim = 4-i; }
  1926     for ( 
int i=0; i<5; ++i ) { 
if ( (lensort[i] & 7) == 4 ) 
e_dim = 4-i; }
  1928     if ( a_len >= b_len && a_len >= c_len ) {
  1930       if ( b_len >= c_len ) {
  1935     } 
else if ( b_len >= a_len && b_len >= c_len ) {
  1937       if ( a_len >= c_len ) {
  1944       if ( a_len >= b_len ) {
  1951     iout << 
iINFO << 
"TORUS MINIMAL MESH SIZE IS " << a_len << 
" BY " << b_len << 
" BY " << c_len
  1953     << 
" BY " << d_len << 
" BY " << e_len
  1961     tmgr.rankToCoordinates(
fixpe(pe),a,b,c,d,e,t);
  1963     tmgr.rankToCoordinates(
fixpe(pe),a,b,c,t);
  1982       int crds1[3], crds2[3];
  1985       for ( 
int i=0; i<3; ++i ) {
  1987         if ( crds1[d] != crds2[d] ) 
return ( crds1[d] < crds2[d] );
  1990       return ( index[pe1] < index[pe2] );
  1994     if ( node_begin == node_end ) 
return node_begin;
  1995     int tmins[3], tmaxs[3], tlens[3], sortdims[3];
  1996     coords(*node_begin, tmins);
  1997     coords(*node_begin, tmaxs);
  1998     for ( 
int *peitr = node_begin; peitr != node_end; ++peitr ) {
  2001       for ( 
int i=0; i<3; ++i ) {
  2002         if ( tvals[i] < tmins[i] ) tmins[i] = tvals[i];
  2003         if ( tvals[i] > tmaxs[i] ) tmaxs[i] = tvals[i];
  2006     for ( 
int i=0; i<3; ++i ) {
  2007       tlens[i] = tmaxs[i] - tmins[i];
  2009     sortdims[0] = splitdim;
  2010     for ( 
int i=0, j=0; i<3; ++i ) {
  2011       if ( i != splitdim ) sortdims[++j] = i;
  2013     if ( tlens[sortdims[1]] < tlens[sortdims[2]] ) {
  2014       int tmp = sortdims[1];
  2015       sortdims[1] = sortdims[2];
  2019     int *nodes = node_begin;
  2020     int nnodes = node_end - node_begin;
  2023     int c_split = 
coord(nodes[0],splitdim);
  2024     for ( 
int i=0; i<nnodes; ++i ) {
  2025       if ( 
coord(nodes[i],splitdim) != c_split ) {
  2026         int mid = (nnodes+1)/2;
  2027         if ( abs(i-mid) < abs(i_split-mid) ) {
  2029           c_split = 
coord(i,splitdim);
  2035     for ( 
int i=0; i<nnodes; ++i ) {
  2036       if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
  2037         int mid = (nnodes+1)/2;
  2038         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
  2042     return ( node_begin + i_split );
  2052     if ( a1 < a2 ) 
return true;
  2053     if ( a1 > a2 ) 
return false;
  2054     int dir = ( (a1 & 1) ? -1 : 1 );
  2057     if ( b1 * dir < b2 * dir ) 
return true;
  2058     if ( b1 * dir > b2 * dir ) 
return false;
  2059     dir *= ( (b1 & 1) ? -1 : 1 );
  2062     if ( c1 * dir < c2 * dir ) 
return true;
  2073     if ( a1 < a2 ) 
return true;
  2074     if ( a1 > a2 ) 
return false;
  2075     int dir = ( (a1 & 1) ? -1 : 1 );
  2078     if ( b1 * dir < b2 * dir ) 
return true;
  2079     if ( b1 * dir > b2 * dir ) 
return false;
  2080     dir *= ( (b1 & 1) ? -1 : 1 );
  2083     if ( c1 * dir < c2 * dir ) 
return true;
  2094     if ( a1 < a2 ) 
return true;
  2095     if ( a1 > a2 ) 
return false;
  2096     int dir = ( (a1 & 1) ? -1 : 1 );
  2099     if ( b1 * dir < b2 * dir ) 
return true;
  2100     if ( b1 * dir > b2 * dir ) 
return false;
  2101     dir *= ( (b1 & 1) ? -1 : 1 );
  2104     if ( c1 * dir < c2 * dir ) 
return true;
  2110   int *patch_begin, 
int *patch_end,
  2111   int *node_begin, 
int *node_end,
  2113   double *sortedLoads,
  2120   int *patches = patch_begin;
  2121   int npatches = patch_end - patch_begin;
  2122   int *nodes = node_begin;
  2123   int nnodes = node_end - node_begin;
  2126   const int emptyPatchLoad = 
simParams->emptyPatchLoad;
  2127   double totalRawLoad = 0;
  2128   for ( 
int i=0; i<npatches; ++i ) {
  2130 #ifdef MEM_OPT_VERSION  2131     double load = patchMap->numAtoms(pid) + emptyPatchLoad;
  2135     patchLoads[pid] = load;
  2136     sortedLoads[i] = load;
  2137     totalRawLoad += load;
  2139   std::sort(sortedLoads,sortedLoads+npatches);
  2143   double maxPatchLoad = 1;
  2144   for ( 
int i=0; i<npatches; ++i ) {
  2145     double load = sortedLoads[i];
  2146     double total = sumLoad + (npatches-i) * load;
  2147     if ( nnodes * load > total ) 
break;
  2149     maxPatchLoad = load;
  2151   double totalLoad = 0;
  2152   for ( 
int i=0; i<npatches; ++i ) {
  2154     if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
  2155     totalLoad += patchLoads[pid];
  2157   if ( nnodes * maxPatchLoad > totalLoad )
  2158     NAMD_bug(
"algorithm failure in WorkDistrib recursive_bisect_with_curve()");
  2160   int a_len, b_len, c_len;
  2161   int a_min, b_min, c_min;
  2163     a_min = patchMap->
index_a(patches[0]);
  2164     b_min = patchMap->
index_b(patches[0]);
  2165     c_min = patchMap->
index_c(patches[0]);
  2169     for ( 
int i=1; i<npatches; ++i ) {
  2170       int a = patchMap->
index_a(patches[i]);
  2171       int b = patchMap->
index_b(patches[i]);
  2172       int c = patchMap->
index_c(patches[i]);
  2173       if ( a < a_min ) a_min = a;
  2174       if ( b < b_min ) b_min = b;
  2175       if ( c < c_min ) c_min = c;
  2176       if ( a > a_max ) a_max = a;
  2177       if ( b > b_max ) b_max = b;
  2178       if ( c > c_max ) c_max = c;
  2180     a_len = a_max - a_min;
  2181     b_len = b_max - b_min;
  2182     c_len = c_max - c_min;
  2185   int *node_split = node_begin;
  2187   if ( 
simParams->disableTopology ) ; 
else  2188   if ( a_len >= b_len && a_len >= c_len ) {
  2190   } 
else if ( b_len >= a_len && b_len >= c_len ) {
  2192   } 
else if ( c_len >= a_len && c_len >= b_len ) {
  2196   if ( node_split == node_begin ) {  
  2201     for ( 
int i=0; i<nnodes; ++i ) {
  2202       if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
  2203         int mid = (nnodes+1)/2;
  2204         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
  2208     node_split = node_begin + i_split;
  2211   bool final_patch_sort = 
false;
  2213   if ( node_split == node_begin ) {  
  2215         nnodes == CmiNumPesOnPhysicalNode(CmiPhysicalNodeID(*node_begin)) ) {
  2217       tmgr.
coords(*node_begin, crds);
  2218       CkPrintf(
"WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
  2219                CmiPhysicalNodeID(*node_begin), *node_begin,
  2220                CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
  2221                a_min, b_min, c_min, npatches,
  2222                a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
  2226     final_patch_sort = 
true;
  2230     for ( 
int i=0; i<nnodes; ++i ) {
  2231       if ( CmiNodeOf(nodes[i_split]) != CmiNodeOf(nodes[i]) ) {
  2232         int mid = (nnodes+1)/2;
  2233         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
  2237     node_split = node_begin + i_split;
  2240   if ( node_split == node_begin ) {  
  2242         nnodes == CmiNodeSize(CmiNodeOf(*node_begin)) ) {
  2244       tmgr.
coords(*node_begin, crds);
  2245       CkPrintf(
"WorkDistrib: node %5d pe %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
  2246                CmiNodeOf(*node_begin), *node_begin, npatches,
  2247                a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
  2251     node_split = node_begin + nnodes/2;
  2254   if ( nnodes == 1 ) {  
  2256     int *node = node_begin;
  2258     for ( 
int i=0; i < npatches; ++i ) {
  2259       int pid = patches[i];
  2260       assignedNode[pid] = *node;
  2261       sumLoad += patchLoads[pid];
  2262       if ( 0 ) CkPrintf(
"assign %5d node %5d patch %5d %5d %5d load %7f total %7f\n",
  2267                 patchLoads[pid], sumLoad);
  2273   if ( final_patch_sort ) {
  2276   } 
else if ( a_len >= b_len && a_len >= c_len ) {
  2277     if ( 0 ) CkPrintf(
"sort a\n");
  2279   } 
else if ( b_len >= a_len && b_len >= c_len ) {
  2280     if ( 0 ) CkPrintf(
"sort b\n");
  2282   } 
else if ( c_len >= a_len && c_len >= b_len ) {
  2283     if ( 0 ) CkPrintf(
"sort c\n");
  2289     int *node = node_begin;
  2291     for ( patch_split = patch_begin;
  2292           patch_split != patch_end && node != node_split;
  2294       sumLoad += patchLoads[*patch_split];
  2295       double targetLoad = totalLoad *
  2296         ((double)(node-node_begin+1) / (double)nnodes);
  2297       if ( 0 ) CkPrintf(
"test %5ld node %5d patch %5d %5d %5d load %7f target %7f\n",
  2298                 patch_split - patch_begin, *node,
  2299                 patchMap->
index_a(*patch_split),
  2300                 patchMap->
index_b(*patch_split),
  2301                 patchMap->
index_c(*patch_split),
  2302                 sumLoad, targetLoad);
  2303       double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
  2304       if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
  2306     double targetLoad = totalLoad *
  2307       ((double)(node_split-node_begin) / (double)nnodes);
  2308     if ( 0 ) CkPrintf(
"split node %5ld/%5d patch %5ld/%5d load %7f target %7f\n",
  2309               node_split-node_begin, nnodes,
  2310               patch_split-patch_begin, npatches,
  2311               sumLoad, targetLoad);
  2316     patch_begin, patch_split, node_begin, node_split,
  2317     patchLoads, sortedLoads, assignedNode, tmgr);
  2319     patch_split, patch_end, node_split, node_end,
  2320     patchLoads, sortedLoads, assignedNode, tmgr);
  2324 void WorkDistrib::assignPatchesSpaceFillingCurve() 
  2328   const int numPatches = patchMap->
numPatches();
  2329   int *assignedNode = 
new int[numPatches];
  2335           NAMD_die(
"simulateInitialMapping not supported by assignPatchesSpaceFillingCurve()");
  2340   for ( 
int i=0; i<numPatches; ++i ) {
  2341     patchOrdering[i] = i;
  2345   nodeOrdering.resize(0);
  2346   for ( 
int i=0; i<numNodes; ++i ) {
  2348     if ( 
simParams->noPatchesOnZero && numNodes > 1 ) {
  2349       if ( pe == 0 ) 
continue;
  2350       if(
simParams->noPatchesOnOne && numNodes > 2) {
  2351         if ( pe == 1 ) 
continue;
  2354 #ifdef MEM_OPT_VERSION  2359     nodeOrdering.add(pe);
  2360     if ( 0 ) CkPrintf(
"using pe %5d\n", pe);
  2363   int *node_begin = nodeOrdering.begin();
  2364   int *node_end = nodeOrdering.end();
  2365   if ( nodeOrdering.size() > numPatches ) {
  2366     node_end = node_begin + numPatches;
  2368   std::sort(node_begin, node_end, pe_sortop_compact());
  2370   int *basenode_begin = node_begin;
  2371   int *basenode_end = node_end;
  2372   if ( nodeOrdering.size() > 2*numPatches ) {
  2373     basenode_begin = node_end;
  2374     basenode_end = basenode_begin + numPatches;
  2375     std::sort(basenode_begin, basenode_end, pe_sortop_compact());
  2379     iout << 
iWARN << 
"IGNORING TORUS TOPOLOGY DURING PATCH PLACEMENT\n" << 
endi;
  2383     patchOrdering.begin(), patchOrdering.end(),
  2384     node_begin, node_end,
  2385     patchLoads.begin(), sortedLoads.begin(), assignedNode, tmgr);
  2387   std::sort(node_begin, node_end, pe_sortop_compact());
  2389   int samenodecount = 0;
  2391   for ( 
int pid=0; pid<numPatches; ++pid ) {
  2392     int node = assignedNode[pid];
  2394     int nodeidx = std::lower_bound(node_begin, node_end, node,
  2395                                    pe_sortop_compact()) - node_begin;
  2396     int basenode = basenode_begin[nodeidx];
  2398     if ( CmiPeOnSamePhysicalNode(node,basenode) ) ++samenodecount;
  2401   iout << 
iINFO << 
"Placed " << (samenodecount*100./numPatches) << 
"% of base nodes on same physical node as patch\n" << 
endi;
  2403   delete [] assignedNode; 
  2411   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
  2412   Node *node = nd.ckLocalBranch();
  2414   DebugM(3,
"Mapping computes\n");
  2423     mapComputeHomePatches(computeDPMTAType);
  2425     NAMD_die(
"This binary does not include DPMTA (FMA).");
  2430       mapComputeHomePatches(computeDPMEType);
  2437 #if defined(NAMD_CUDA) || defined(NAMD_HIP)  2451     DebugM(2,
"adding ComputeGlobal\n");
  2469 #ifdef CHARM_HAS_MSA  2480 #if defined(NAMD_CUDA) || defined(NAMD_HIP)  2483     mapComputeNode(computeBondedCUDAType);
  2488 #if defined(NAMD_CUDA) || defined(NAMD_HIP)  2498   mapComputeNonbonded();
  2566     CkPrintf(
"ComputeMap has been loaded from %s.\n", 
simParams->computeMapFilename);
  2571 void WorkDistrib::mapComputeHomeTuples(
ComputeType type)
  2575   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
  2576   Node *node = nd.ckLocalBranch();
  2584   char *isBaseNode = 
new char[numNodes];
  2585   memset(isBaseNode,0,numNodes*
sizeof(
char));
  2588   for(
int j=0; j<numPatches; j++) {
  2589     isBaseNode[patchMap->
basenode(j)] = 1;
  2592   for(
int i=0; i<numNodes; i++) {
  2593     if ( isBaseNode[i] ) {
  2598   delete [] isBaseNode;
  2602 void WorkDistrib::mapComputeHomePatches(
ComputeType type)
  2606   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
  2607   Node *node = nd.ckLocalBranch();
  2615   for(
int i=0; i<numNodes; i++) {
  2623 void WorkDistrib::mapComputePatch(
ComputeType type)
  2634     computeMap->
newPid(cid,i);
  2641 void WorkDistrib::mapComputeNode(
ComputeType type)
  2649   int ncpus = CkNumPes();
  2655   for(
int i=0; i<ncpus; i++) {
  2662 void WorkDistrib::mapComputeNonbonded(
void)
  2670   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
  2671   Node *node = nd.ckLocalBranch();
  2673   int ncpus = CkNumPes();
  2674   int nodesize = CkMyNodeSize();
  2677           nodesize = 
simParams->simulatedNodeSize;
  2688   double partScaling = 1.0;
  2689   if ( ncpus < patchMap->numPatches() ) {
  2690     partScaling = ((double)ncpus) / ((double)patchMap->
numPatches());
  2696    int numPartitions = 1;
  2699 #ifdef  MEM_OPT_VERSION      2700     int64 numFixed = patchMap->numFixedAtoms(i);
  2701     int64 numAtoms = patchMap->numAtoms(i);
  2709       numPartitions = (int) ( partScaling * ( 0.5 +
  2710         (numAtoms*numAtoms-numFixed*numFixed) / (
double)(2*divide*divide) ) );
  2712     if (numPartitions < 1) numPartitions = 1;
  2716     DebugM(4,
"Mapping " << numPartitions << 
" ComputeNonbondedSelf objects for patch " << i << 
"\n");
  2731       computeMap->
newPid(cid,i);
  2736   for(
int p1=0; p1 <patchMap->
numPatches(); p1++) 
  2740     for(j=0;j<numNeighbors;j++)
  2742         int p2 = oneAway[j];
  2743         int dsp = oneAwayDownstream[j];
  2745       int numPartitions = 1;
  2748 #ifdef  MEM_OPT_VERSION          2749         int64 numAtoms1 = patchMap->numAtoms(p1);
  2750         int64 numAtoms2 = patchMap->numAtoms(p2);
  2751         int64 numFixed1 = patchMap->numFixedAtoms(p1);
  2752         int64 numFixed2 = patchMap->numFixedAtoms(p2);
  2761         const int t2 = oneAwayTrans[j];
  2768         const int ia1 = patchMap->
index_a(p1);
  2770         const int ib1 = patchMap->
index_b(p1);
  2772         const int ic1 = patchMap->
index_c(p1);
  2775         if ( abs(ia2-ia1) > nax ||
  2776              abs(ib2-ib1) > nay ||
  2777              abs(ic2-ic1) > naz )
  2778           NAMD_bug(
"Bad patch distance in WorkDistrib::mapComputeNonbonded");
  2781         if ( ia1 == ia2 ) --distance;
  2782         else if ( ia1 == ia2 + nax - 1 ) --distance;
  2783         else if ( ia1 + nax - 1 == ia2 ) --distance;
  2784         if ( ib1 == ib2 ) --distance;
  2785         else if ( ib1 == ib2 + nay - 1 ) --distance;
  2786         else if ( ib1 + nay - 1 == ib2 ) --distance;
  2787         if ( ic1 == ic2 ) --distance;
  2788         else if ( ic1 == ic2 + naz - 1 ) --distance;
  2789         else if ( ic1 + naz - 1 == ic2 ) --distance;
  2791         if ( distance == 0 ) {
  2793         } 
else if (distance == 1) {
  2799           numPartitions = (int) ( partScaling * ( 0.5 +
  2800             (numAtoms1*numAtoms2-numFixed1*numFixed2)/(
double)(divide*divide) ) );
  2802         if ( numPartitions < 1 ) numPartitions = 1;
  2812                   computeMap->
newPid(cid,p1);
  2813                   computeMap->
newPid(cid,p2,oneAwayTrans[j]);
  2814                   patchMap->
newCid(p1,cid);
  2815                   patchMap->
newCid(p2,cid);
  2822 void WorkDistrib::mapComputeLCPO(
void) {
  2827   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
  2828   Node *node = nd.ckLocalBranch();
  2830   int ncpus = CkNumPes();
  2831   int nodesize = CkMyNodeSize();
  2832   const int maxPatches = 8;
  2834   int numPatchesInOctet;
  2835   PatchID patchesInOctet[maxPatches];
  2836   int oneAwayTrans[maxPatches];
  2839   int numPartitions = 1;
  2855       for (
int p = 0; p < numPatchesInOctet; p++) {
  2856         computeMap->
newPid(cid, patchesInOctet[p], oneAwayTrans[p]);
  2858       for (
int p = 0; p < numPatchesInOctet; p++) {
  2859         patchMap->
newCid(patchesInOctet[p],cid);
  2872     NAMD_bug(
"compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
  2878   int type = compute->
type();
  2879   int cid = compute->
cid;
  2881   CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
  2885     wdProxy[CkMyPe()].enqueueExcls(msg);
  2889     wdProxy[CkMyPe()].enqueueBonds(msg);
  2893     wdProxy[CkMyPe()].enqueueAngles(msg);
  2897     wdProxy[CkMyPe()].enqueueDihedrals(msg);
  2901     wdProxy[CkMyPe()].enqueueImpropers(msg);
  2905     wdProxy[CkMyPe()].enqueueThole(msg);
  2909     wdProxy[CkMyPe()].enqueueAniso(msg);
  2913     wdProxy[CkMyPe()].enqueueCrossterms(msg);
  2917     wdProxy[CkMyPe()].enqueueOneFourNbThole(msg);
  2922     wdProxy[CkMyPe()].enqueueGromacsPair(msg);
  2926     wdProxy[CkMyPe()].enqueueLCPO(msg);
  2929     switch ( seq % 2 ) {
  2932       switch ( gbisPhase ) {
  2934            wdProxy[CkMyPe()].enqueueSelfA1(msg);
  2937            wdProxy[CkMyPe()].enqueueSelfA2(msg);
  2940            wdProxy[CkMyPe()].enqueueSelfA3(msg);
  2946       switch ( gbisPhase ) {
  2948            wdProxy[CkMyPe()].enqueueSelfB1(msg);
  2951            wdProxy[CkMyPe()].enqueueSelfB2(msg);
  2954            wdProxy[CkMyPe()].enqueueSelfB3(msg);
  2959       NAMD_bug(
"WorkDistrib::messageEnqueueSelf case statement error!");
  2963     switch ( seq % 2 ) {
  2966       switch ( gbisPhase ) {
  2968            wdProxy[CkMyPe()].enqueueWorkA1(msg);
  2971            wdProxy[CkMyPe()].enqueueWorkA2(msg);
  2974            wdProxy[CkMyPe()].enqueueWorkA3(msg);
  2980       switch ( gbisPhase ) {
  2982            wdProxy[CkMyPe()].enqueueWorkB1(msg);
  2985            wdProxy[CkMyPe()].enqueueWorkB2(msg);
  2988            wdProxy[CkMyPe()].enqueueWorkB3(msg);
  2993       wdProxy[CkMyPe()].enqueueWorkC(msg);
  2996       NAMD_bug(
"WorkDistrib::messageEnqueueWork case statement error!");
  2999 #if defined(NAMD_CUDA) || defined(NAMD_HIP)  3003     switch ( gbisPhase ) {
  3005          wdProxy[CkMyPe()].enqueueCUDA(msg);
  3008          wdProxy[CkMyPe()].enqueueCUDAP2(msg);
  3011          wdProxy[CkMyPe()].enqueueCUDAP3(msg);
  3020     wdProxy[CkMyPe()].enqueueMIC(msg);
  3025     wdProxy[CkMyPe()].enqueuePme(msg);
  3027 #if defined(NAMD_CUDA) || defined(NAMD_HIP)  3029     wdProxy[CkMyPe()].enqueuePme(msg);
  3033     wdProxy[CkMyPe()].enqueueWork(msg);
  3044     NAMD_bug(
"compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
  3050   CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
  3052 #if defined(NAMD_CUDA) || defined(NAMD_HIP)  3054     switch ( gbisPhase ) {
  3056          wdProxy[CkMyPe()].finishCUDA(msg);
  3059          wdProxy[CkMyPe()].finishCUDAP2(msg);
  3062          wdProxy[CkMyPe()].finishCUDAP3(msg);
  3077     NAMD_bug(
"compute->sequence() < 0 in WorkDistrib::messageFinishMIC");
  3083   CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
  3086     wdProxy[CkMyPe()].finishMIC(msg);
  3095     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3101     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3107     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3113     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3119     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3125     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3131     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3137     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3143     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3149     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3157     NAMD_bug(
"\nWorkDistrib LocalWorkMsg recycling failed! Check enqueueGromacsPair from WorkDistrib.C\n");
  3164     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3170     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3175     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3180     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3185     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3191     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3196     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3201     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3207     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3212     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3217     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3223     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3228     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3233     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3241     NAMD_bug(
"WorkDistrib LocalWorkMsg recycling failed!");
  3290 void WorkDistrib::velocities_from_PDB(
const char *filename, 
  3291                                       Vector *v, 
int totalAtoms)
  3297   v_pdb = 
new PDB(filename);
  3298   if ( v_pdb == NULL )
  3300     NAMD_die(
"memory allocation failed in Node::velocities_from_PDB");
  3309     sprintf(err_msg, 
"FOUND %d COORDINATES IN VELOCITY PDB!!",
  3319   for (i=0; i<totalAtoms; i++)
  3344 void WorkDistrib::velocities_from_binfile(
const char *fname, 
Vector *vels, 
int n)
  3365                                     Vector *v, 
int totalAtoms)
  3375   int lesReduceTemp = lesOn && 
simParams->lesReduceTemp;
  3382   for (i=0; i<totalAtoms; i++)
  3384     if (structure->
atommass(i) <= 0.) {
  3387       kbToverM = sqrt(kbT *
  3388         ( lesOn && structure->
get_fep_type(i) ? tempFactor : 1.0 ) /
  3401     for (randnum=0.0, j=0; j<12; j++)
  3403       randnum += vel_random.uniform();
  3408     v[i].
x = randnum*kbToverM;
  3410     for (randnum=0.0, j=0; j<12; j++)
  3412       randnum += vel_random.uniform();
  3417     v[i].
y = randnum*kbToverM;
  3419     for (randnum=0.0, j=0; j<12; j++)
  3421       randnum += vel_random.uniform();
  3426     v[i].
z = randnum*kbToverM;
  3429   if ( 
simParams->drudeOn ) 
for (i=0; i<totalAtoms; i++) {
  3448 void WorkDistrib::remove_com_motion(
Vector *vel, 
Molecule *structure, 
int n)
  3458     mv += mass * vel[i];
  3464   iout << 
iINFO << 
"REMOVING COM VELOCITY "  3467   for (i=0; i<n; i++) { vel[i] -= mv; }
  3476 int WorkDistrib::assignPatchesTopoGridRecBisection() {
  3479   int *assignedNode = 
new int[patchMap->
numPatches()];
  3486   int usedNodes = numNodes;
  3487   CkPrintf(
"assignPatchesTopoGridRecBisection\n");
  3488   if ( 
simParams->noPatchesOnZero && numNodes > 1 ) {
  3490     if ( 
simParams->noPatchesOnOne && numNodes > 2 )
  3495   int xsize = 0, ysize = 0, zsize = 0;
  3499   xsize = tmgr.getDimNX();
  3500   ysize = tmgr.getDimNY();
  3501   zsize = tmgr.getDimNZ();
  3504   int rc = recBisec.partitionProcGrid(xsize, ysize, zsize, assignedNode);
  3506   delete [] assignedNode;
  3513 #if defined(NAMD_MIC)  3514   extern void mic_hostDeviceLDB();
  3515   extern void mic_contributeHostDeviceLDB(
int idLen, 
int * 
id);
  3516   extern void mic_setDeviceLDBParams(
int dt, 
int hs, 
int sp1, 
int pp1, 
int pp2);
  3520   #if defined(NAMD_MIC)  3521     CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
  3522     wdProxy.initHostDeviceLDB();
  3527   #if defined(NAMD_MIC)  3528     mic_hostDeviceLDB();
  3533   #if defined(NAMD_MIC)  3534     CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
  3535     wdProxy[0].contributeHostDeviceLDB(peSetLen, peSet);
  3540   #if defined(NAMD_MIC)  3541     mic_contributeHostDeviceLDB(peSetLen, peSet);
  3546   #if defined(NAMD_MIC)  3547     CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
  3548     wdProxy.setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
  3553   #if defined(NAMD_MIC)  3554     mic_setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
  3559 #include "WorkDistrib.def.h" 
Real atomcharge(int anum) const
 
bool operator()(int p1, int p2) const
 
void setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
 
void enqueueMIC(LocalWorkMsg *msg)
 
void enqueueOneFourNbThole(LocalWorkMsg *msg)
 
std::ostream & iINFO(std::ostream &s)
 
static void sortPmePes(int *pmepes, int xdim, int ydim)
 
int get_mother_atom(int) const
 
ScaledPosition center(int pid) const
 
Bool simulateInitialMapping
 
static void messageFinishMIC(Compute *)
 
bool operator()(int a, int b) const
 
int isSendSpanningTreeUnset()
 
patch_sortop_curve_b(PatchMap *m)
 
void enqueueAngles(LocalWorkMsg *msg)
 
static void messageFinishCUDA(Compute *)
 
int getNumFixedAtoms() const
 
PatchID assignToPatch(Position p, const Lattice &l)
 
void setNewNumPartitions(ComputeID cid, char numPartitions)
 
static bool less_than_bit_reversed(int a, int b)
 
static void recursive_bisect_with_curve(int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
 
void saveComputeMap(const char *fname)
 
static ProxyMgr * Object()
 
NAMD_HOST_DEVICE int c_p() const
 
static int * peCompactOrdering
 
BigReal max_a(int pid) const
 
void finishCUDAP3(LocalWorkMsg *msg)
 
void enqueueCrossterms(LocalWorkMsg *msg)
 
bool operator()(int p1, int p2) const
 
static void partition(int *order, const FullAtom *atoms, int begin, int end)
 
int isRecvSpanningTreeUnset()
 
void enqueuePme(LocalWorkMsg *msg)
 
static PatchMap * Object()
 
void enqueueWorkA3(LocalWorkMsg *msg)
 
void enqueueWork(LocalWorkMsg *msg)
 
void enqueueGromacsPair(LocalWorkMsg *msg)
 
void enqueueSelfA1(LocalWorkMsg *msg)
 
void finishCUDAP2(LocalWorkMsg *msg)
 
static void send_contributeHostDeviceLDB(int peSetLen, int *peSet)
 
SimParameters * simParameters
 
void loadComputeMap(const char *fname)
 
Bool CUDASOAintegrateMode
 
void createHomePatch(PatchID pid, FullAtomList &a)
 
void sendAtoms(PatchID pid, FullAtomList &a)
 
void enqueueExcls(LocalWorkMsg *msg)
 
void enqueueBonds(LocalWorkMsg *msg)
 
std::ostream & endi(std::ostream &s)
 
void enqueueAniso(LocalWorkMsg *msg)
 
void enqueueSelfB1(LocalWorkMsg *msg)
 
void enqueueWorkB1(LocalWorkMsg *msg)
 
static void messageEnqueueWork(Compute *)
 
static void peOrderingReady()
 
std::ostream & iWARN(std::ostream &s)
 
int operator==(const nodesort &o) const
 
MIStream * get(char &data)
 
int index_a(int pid) const
 
int sizeGrid(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int asplit, int bsplit, int csplit)
 
ComputeID storeCompute(int node, int maxPids, ComputeType type, int partition=-1, int numPartitions=0)
 
Patch * patch(PatchID pid)
 
void enqueueSelfA3(LocalWorkMsg *msg)
 
int add(const Elem &elem)
 
bool operator()(int pe1, int pe2) const
 
Molecule stores the structural information for the system. 
 
NAMD_HOST_DEVICE int b_p() const
 
void movePatch(PatchID, NodeID)
 
LocalWorkMsg *const localWorkMsg
 
void recvComputeMapChanges(ComputeMapChangeMsg *)
 
int gridsize_c(void) const
 
char newNumPartitions(ComputeID cid)
 
void reorder(Elem *a, int n)
 
HydrogenGroup hydrogenGroup
 
void enqueueCUDA(LocalWorkMsg *msg)
 
void sendComputeMap(void)
 
void enqueueWorkB2(LocalWorkMsg *msg)
 
void enqueueCUDAP2(LocalWorkMsg *msg)
 
void assignBaseNode(PatchID, NodeID)
 
static void recursive_bisect_coord(int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
 
void newCid(int pid, int cid)
 
constexpr int getWaterModelGroupSize(const WaterModel &watmodel)
 
void enqueueSelfB3(LocalWorkMsg *msg)
 
int coord(int pe, int dim)
 
int gridsize_a(void) const
 
TopoManagerWrapper & tmgr
 
int numPatches(void) const
 
static NAMD_HOST_DEVICE int offset_b(int i)
 
void enqueueWorkC(LocalWorkMsg *msg)
 
pe_sortop_bit_reversed(int *r)
 
void reinitAtoms(const char *basename=0)
 
int operator<(const nodesort &o) const
 
void enqueueThole(LocalWorkMsg *msg)
 
void enqueueWorkA2(LocalWorkMsg *msg)
 
NAMD_HOST_DEVICE Position apply_transform(Position data, const Transform &t) const
 
void createHomePatches(void)
 
void NAMD_bug(const char *err_msg)
 
static NAMD_HOST_DEVICE int offset_c(int i)
 
void enqueueImpropers(LocalWorkMsg *msg)
 
BigReal min_c(int pid) const
 
static int eventMachineProgress
 
Real langevin_param(int atomnum) const
 
Index atomvdwtype(int anum) const
 
int numaway_c(void) const
 
void enqueueLCPO(LocalWorkMsg *msg)
 
int oneOrTwoAwayNeighbors(int pid, PatchID *neighbor_ids, PatchID *downstream_ids=0, int *transform_ids=0)
 
int index_b(int pid) const
 
Bool staticAtomAssignment
 
pe_sortop_coord_y(ScaledPosition *s)
 
Bool replicaUniformPatchGrids
 
bool operator()(int a, int b) const
 
void finishCUDA(LocalWorkMsg *msg)
 
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
 
int numaway_a(void) const
 
NAMD_HOST_DEVICE int a_p() const
 
NAMD_HOST_DEVICE Vector a_r() const
 
NAMD_HOST_DEVICE Vector b_r() const
 
void setNewNode(ComputeID cid, NodeID node)
 
virtual void finishPatch(int)
 
NAMD_HOST_DEVICE Position nearest(Position data, ScaledPosition ref) const
 
void NAMD_die(const char *err_msg)
 
void enqueueCUDAP3(LocalWorkMsg *msg)
 
static int * peDiffuseOrderingIndex
 
BigReal min_a(int pid) const
 
NAMD_HOST_DEVICE Vector c_r() const
 
Real atommass(int anum) const
 
static int compare_bit_reversed(int a, int b)
 
void enqueueWorkA1(LocalWorkMsg *msg)
 
Bool pressureProfileEwaldOn
 
std::vector< std::string > split(const std::string &text, std::string delimiter)
 
static int * peDiffuseOrdering
 
void makePatches(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int replicaUniformPatchGrids, int lcpo, int asplit, int bsplit, int csplit)
 
int basenode(int pid) const
 
int index_c(int pid) const
 
unsigned char get_fep_type(int anum) const
 
static int peOrderingInit
 
void find_extremes(const Lattice &, BigReal frac=1.0)
 
void saveComputeMapChanges(int, CkGroupID)
 
int32 status
Atom status bit fields defined in structures.h. 
 
void finishCUDAPatch(FinishWorkMsg *msg)
 
void savePatchMap(PatchMapMsg *msg)
 
void topo_getargs(char **argv)
 
static int * peCompactOrderingIndex
 
static void buildNodeAwarePeOrdering(void)
 
patch_sortop_curve_a(PatchMap *m)
 
int pressureProfileAtomTypes
 
int atomsInMigrationGroup
 
void newPid(ComputeID cid, int pid, int trans=13)
 
static void send_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
 
static NAMD_HOST_DEVICE int offset_a(int i)
 
BigReal max_b(int pid) const
 
void enqueueSelfA2(LocalWorkMsg *msg)
 
static ComputeMap * Object()
 
static void build_ordering(void *)
 
int numaway_b(void) const
 
void distributeHomePatches(void)
 
void assignNode(PatchID, NodeID)
 
patch_sortop_curve_c(PatchMap *m)
 
BigReal max_c(int pid) const
 
void enqueueSelfB2(LocalWorkMsg *msg)
 
int gridsize_b(void) const
 
int numPatchesOnNode(int node)
 
MOStream * put(char data)
 
static void send_initHostDeviceLDB()
 
FullAtomList * createAtomLists(const char *basename=0)
 
#define SET_PRIORITY(MSG, SEQ, PRIO)
 
pe_sortop_coord_x(ScaledPosition *s)
 
void enqueueDihedrals(LocalWorkMsg *msg)
 
Bool is_atom_fixed(int atomnum) const
 
void finishMIC(LocalWorkMsg *msg)
 
StringList * find(const char *name) const
 
void contributeHostDeviceLDB(int peSetLen, int *peSet)
 
void pack(char *buf, int size)
 
int isOutputProcessor(int pe)
 
void get_vdw_params(Real *sigma, Real *epsilon, Real *sigma14, Real *epsilon14, Index index)
 
void doneSaveComputeMap(CkReductionMsg *)
 
void unpack(MIStream *msg)
 
Real rigid_bond_length(int atomnum) const
 
__thread DeviceCUDA * deviceCUDA
 
void get_all_positions(Vector *)
 
BigReal min_b(int pid) const
 
pe_sortop_topo(TopoManagerWrapper &t, int *d)
 
void coords(int pe, int *crds)
 
void enqueueWorkB3(LocalWorkMsg *msg)
 
NAMD_HOST_DEVICE Vector origin() const
 
bool operator()(int a, int b) const
 
Bool noPatchesOnOutputPEs
 
int * sortAndSplit(int *node_begin, int *node_end, int splitdim)
 
void sortAtomsForPatches(int *order, int *breaks, const FullAtom *atoms, int nmgrps, int natoms, int ni, int nj, int nk)
 
bool operator()(int p1, int p2) const
 
void assignNodeToPatch(void)
 
int getPatchesInOctet(int pid, PatchID *pids, int *transform_ids=0)
 
NodeID newNode(ComputeID cid)