23 #define ST_NODE_LOAD            0.005    24 #define PROXY_LOAD              0.001    25 #define COMPUTE_LOAD            0.00005    28       processorInfo *processorArray, 
int nComps, 
int nPatches, 
int nPes)
    49   const int endGroup = beginGroup + 
P;
    50 #define INGROUP(PROC) ((PROC) >= beginGroup && (PROC) < endGroup)    66    for (i=0; i<nPatches; i++) {
    81       computeArray[i].processor = -1;
    93    float *temploads = 
new float[
P];
   142    if ( 
P == CkNumPes() ) {
   144    if ( 
P != CkNumPes() ) {
   147      while ( maxinw < CkNumPes() ) {
   152              std::setw(w) << std::right << 
processors[0].
Id << 
"-" <<
   156    iout << 
" Reverting to original mapping\n" << 
endi;
   168    const int endGroup = beginGroup + 
P;
   177   if ( 
P != CkNumPes() ) {
   249    iout << 
iINFO << 
"Strategy not implemented for the base class.\n" << 
"\n";
   276    int numSelfComputes, numPairComputes, numBgSelfComputes, numBgPairComputes;
   281     numBgSelfComputes = 0;
   282     numBgPairComputes = 0;
   302     int numBgComputes = numBgPairComputes + numBgSelfComputes;
   370    int numSelfComputes, numPairComputes;
   406 #if COMPUTE_CORRECTION   456    iout << 
"Assign " << c->
Id << 
" patches " << c->patch1 << 
" " << c->patch2
   457         << 
" load " << c->
load << 
" to " << p->
Id << 
" new load "   459         << 
" nPatches " << nPatches << 
" nProxies " << nProxies;
   460    if ( nPatches + nProxies < 2 ) 
iout << 
" addProxy";
   461    if ( badForComm ) 
iout << 
" badForComm";
   469       iout << 
iINFO << 
"ERROR: Rebalancer tried to deAssign an object that is not on the processor.\n" << 
endi;
   473    double temp_load = 0.0;
   480    CmiAssert( fabs(temp_load - p->
load) < 0.001 );
   549   if ( c->
load + p->
load < thresholdLoad) {
   550     int nPatches, nProxies, badForComm;
   555     pcpair *pair = &grid[nPatches][nProxies][badForComm];
   565       double oldval = pair->
p->
load - pair->
c->
load;
   569       if (newval < oldval) {
   580    int no_new_proxies = 0;  
   597    iout << 
"\nBefore Refinement Summary" << 
"\n";
   626         if ( ! no_new_proxies ) {
   640 #define REASSIGN(GRID) if (GRID.c) { \   641            deAssign(GRID.c, donor); \   642            assign(GRID.c, GRID.p); \   667                                 proxiesOn.iterator((
Iterator *)&nextProc);
   671                                 proxiesOn.next((
Iterator*)&nextProc);
   677                                 proxiesOn.iterator((
Iterator *)&nextProc);
   681                                 proxiesOn.next((
Iterator*)&nextProc);
   694         else if ( no_new_proxies ) { finish = 0; 
break; }
   703           if (donor->
load > thresholdLoad)
   711          << 
"ERROR: Rebalancer::refine() algorithm is broken.\n" << 
endi;
   757         else { finish = 0; 
break; }
   759         if (donor->
load > thresholdLoad)
   767    iout << 
"After Refinement Summary" << 
"\n";
   771      iout << 
iINFO << 
"Refine: No solution found for overLoad = "    776    delete heavyProcessors;
   777    delete lightProcessors;
   794   iout << 
"******** Processors with background load > average load ********" << 
"\n";
   797   int numOverloaded = 0;
   798   for (
int ip=0; ip<
P; ip++) {
   806   if ( numOverloaded ) {
   808       << 
" processors are overloaded due to high background load.\n" << 
endi;
   811   iout << 
"******** Processor List Ends ********" << 
"\n\n";
   814   const double overloadStep = 0.01;
   815   const double overloadStart = overload_start;       
   816   double dCurOverload = max / avg;
   819   int maxOverload = (int)((dCurOverload - overloadStart)/overloadStep + 1);
   820   double dMinOverload = minOverload * overloadStep + overloadStart;
   821   double dMaxOverload = maxOverload * overloadStep + overloadStart;
   825        << 
"Balancing from " << minOverload << 
" = " << dMinOverload 
   826        << 
" to " << maxOverload << 
"=" << dMaxOverload 
   827        << 
" dCurOverload=" << dCurOverload << 
" max=" << max << 
" avg=" << avg
   840       iout << 
iINFO << 
"ERROR: Could not refine at max overload\n" << 
endi;
   846   while (!refineDone) {
   847     if (maxOverload - minOverload <= 1)
   850       curOverload = (maxOverload + minOverload ) / 2;
   852       overLoad = curOverload * overloadStep + overloadStart;
   854       iout << 
iINFO << 
"Testing curOverload " << curOverload 
   856            << minOverload << 
", " << maxOverload
   860         maxOverload = curOverload;
   862         minOverload = curOverload;
   870   iout << 
iINFO << 
"ready to print result \n" << 
"\n";
   877    int i, total = 0, numBytes = 0;
   880    int maxpatchproxies = 0;
   881    double avgBgLoad =0.0;
   883    for (i=0; i<
P; i++) {
   887       if ( nproxies > maxproxies ) maxproxies = nproxies;
   897          if ( myProxies > maxpatchproxies ) maxpatchproxies = myProxies;
   908   if ( 
P == CkNumPes() ) {
   910    if ( 
P != CkNumPes() ) {
   913      while ( maxinw < CkNumPes() ) {
   918              std::setw(w) << std::right << 
processors[0].
Id << 
"-" <<
   923      << 
" MAX " << max << 
"  PROXIES: TOTAL " << total << 
" MAXPE " << 
   924      maxproxies << 
" MAXPATCH " << maxpatchproxies << 
" " << 
strategyName    929    if ( 
P != CkNumPes() ) {  
   932        NAMD_bug(
"Rebalancer::printLoads(0) called with hybrid balancer.");
   935        if ( 
collMsg ) 
NAMD_bug(
"Rebalancer::printLoads(1) collMsg not null.");
   969        NAMD_bug(
"Rebalancer::printLoads() called with unknown phase.");
   984    for (i=1; i<
P; i++) {
   995    iout << 
iINFO << 
"  min = " << min << 
" processor " << min_proc << 
"\n";
   996    iout << 
iINFO << 
"  max = " << max << 
" processor " << max_proc << 
"\n";
   997    iout << 
iINFO << 
"  total = " << total << 
" average = " << total/
P << 
"\n";
  1008    for (i=0; i<
P; i++) {
  1015      CmiPrintf(
"Warning: no processors available for load balancing!\n");
  1033    double bgtotal = 0.;
  1034    for (i=0; i<
P; i++) {
  1042    for (i=0; i<
P; i++) {
  1045         if ( bgload < bgavg ) {
  1075            int *nPatches, 
int *nProxies, 
int *isBadForCommunication)
  1079    int patch_count = 0;
  1080    int proxy_count = 0;
  1083   const int endGroup = beginGroup + 
P;
  1107    *nPatches = patch_count;
  1108    *nProxies = proxy_count;
  1110   if ( isBadForCommunication ) {  
  1113    if ( patch_count + proxy_count < 2 ) {
  1118        if ( proxiesPerPeLimit < 6 ) proxiesPerPeLimit = 6;
  1123        if ( proxiesPerPatchLimit < 6 ) proxiesPerPatchLimit = 6;
  1125        if ( ! bad && ! pa1_avail ) {
  1129            index = realPe - beginGroup;
  1131            if (
processors[index].backgroundLoad > bgLoadLimit) bad = 1;
  1136        if ( ! bad && ! pa2_avail ) {
  1140            index = realPe - beginGroup;
  1142            if ( 
processors[index].backgroundLoad > bgLoadLimit) bad = 1;
  1150    *isBadForCommunication = bad;
  1158 #ifndef NODEAWARE_PROXY_SPANNINGTREE  1187   CkPrintf(
"Done intialising\n");
  1188 #ifdef NODEAWARE_PROXY_SPANNINGTREE 
void sendCollectLoads(CollectLoadsMsg *)
 
std::ostream & iINFO(std::ostream &s)
 
static ProxyMgr * Object()
 
static PatchMap * Object()
 
CollectLoadsMsg * collMsg
 
void createSpanningTree()
 
void assign(computeInfo *c, processorInfo *pRec)
 
std::ostream & endi(std::ostream &s)
 
std::ostream & iWARN(std::ostream &s)
 
void insert(InfoRecord *)
 
void refine_togrid(pcgrid &grid, double thresholdLoad, processorInfo *p, computeInfo *c)
 
processorInfo * processors
 
void printLoads(int phase=0)
 
void numAvailable(computeInfo *c, processorInfo *p, int *nPatches, int *nProxies, int *isBadForCommunication)
 
static Units next(Units u)
 
void deAssign(computeInfo *c, processorInfo *pRec)
 
InfoRecord * next(Iterator *)
 
void multirefine(double overload_start=1.02)
 
void NAMD_bug(const char *err_msg)
 
maxHeap * computeBgSelfHeap
 
maxHeap * computeSelfHeap
 
maxHeap * computePairHeap
 
void increment(int pe, int patch)
 
void buildSpanningTree0()
 
void adjustBackgroundLoadAndComputeAverage()
 
static LdbCoordinator * Object()
 
void decrement(int pe, int patch)
 
maxHeap * computeBgPairHeap
 
int isAvailableOn(patchInfo *patch, processorInfo *p)
 
void unchecked_insert(InfoRecord *)
 
int getVal(int pe, int patch)
 
InfoRecord * iterator(Iterator *)
 
Rebalancer(computeInfo *computeArray, patchInfo *patchArray, processorInfo *processorArray, int nComps, int nPatches, int nPes)
 
const char * strategyName