18 #include "ComputeMgr.decl.h"    32 #define MSM_REDUCE_GRID    36 #define MSM_GRID_CUTOFF_DECOMP    40 #define MSM_SKIP_TOO_DISTANT_BLOCKS    46 #define MSM_SKIP_BEYOND_SPHERE    50 #define MSM_NODE_MAPPING    53 #define MSM_NODE_MAPPING_STATS    54 #undef MSM_NODE_MAPPING_STATS    61 #define MSM_FOLD_FACTOR    76 #define MSM_FIXED_SIZE_GRID_MSG    77 #undef MSM_FIXED_SIZE_GRID_MSG    85 #define DEBUG_MEMORY_ALIGNMENT    86 #undef DEBUG_MEMORY_ALIGNMENT   111 class GridMsg : 
public CkMcastBaseMsg, 
public CMessage_GridMsg {
   162         NAMD_die(
"Exceeded maximum number of MSM levels\n");
   185         NAMD_die(
"Exceeded maximum number of MSM levels\n");
   188           nlevels*
sizeof(CProxy_MsmC1HermiteBlock));
   195           nlevels*
sizeof(CProxy_MsmC1HermiteBlock));
   205     void put(
const CProxy_MsmGridCutoff *p) {
   210     void get(CProxy_MsmGridCutoff *p) {
   217   public CMessage_MsmC1HermiteGridCutoffProxyMsg
   223     void put(
const CProxy_MsmC1HermiteGridCutoff *p) {
   225           sizeof(CProxy_MsmC1HermiteGridCutoff));
   229     void get(CProxy_MsmC1HermiteGridCutoff *p) {
   231           sizeof(CProxy_MsmC1HermiteGridCutoff));
   246   public CkMcastBaseMsg, 
public CMessage_MsmGridCutoffSetupMsg
   253         const CProxyElement_MsmBlock *q 
   260         CProxyElement_MsmBlock *q 
   268   public CkMcastBaseMsg, 
public CMessage_MsmC1HermiteGridCutoffSetupMsg
   275         const CProxyElement_MsmC1HermiteBlock *q 
   278           sizeof(CProxyElement_MsmC1HermiteBlock));
   283         CProxyElement_MsmC1HermiteBlock *q 
   286           sizeof(CProxyElement_MsmC1HermiteBlock));
   297       for (
int i = 0;  i < 
MAX;  i++)  
timing[i] = 0;
   299     void done(
double tm[], 
int n) {
   300       for (
int i = 0;  i < 
MAX;  i++)  
timing[i] = tm[i];
   304       CkPrintf(
"MSM timings:\n");
   310       CkPrintf(
"   communication   %8.6f sec\n", 
timing[
COMM]);
   332       CkPrintf(
"MSM profiling:\n");
   333       CkPrintf(
"   total executions of inner loop:   %d\n", sum);
   334       for (
int i = 0;  i < 
MAX;  i++) {
   335         CkPrintf(
"   executing %d times:   %d  (%5.2f%%)\n",
   407   void subtractTiming() {
   411     if (++cntTiming >= numTiming) {
   412       CkCallback cb(CkReductionTarget(
MsmTimer, done), msmTimer);
   414           CkReduction::sum_double, cb);
   421   void initProfiling() {
   426   void addProfiling() {
   430   void subtractProfiling() {
   433   void doneProfiling() {
   434     if (++cntProfiling >= numProfiling) {
   435       CkCallback cb(CkReductionTarget(
MsmProfiler, done), msmProfiler);
   437           CkReduction::sum_int, cb);
   452   static inline int sign(
int n) {
   453     return (n < 0 ? -1 : (n > 0 ? 1 : 0));
   458       int& ia, 
int& ib, 
int isperiodic);
   483 #ifdef MSM_NODE_MAPPING   489     for (
int l = 0;  l < level;  l++) {
   496     const float scalingFactor = 3;
   503       gn = 
map.
gc[0].extent();
   505     const int volumeFullCutoff = (
map.
bsx[0] + gn.
i - 1) *
   508     int volumeBlock = n.
i * n.
j * n.
k;
   510     int volumeCutoff = nc.
i * nc.
j * nc.
k;
   511     return( scalingFactor * (
float(volumeBlock) / volumeFullBlock) *
   512         (
float(volumeCutoff) / volumeFullCutoff) );
   517     int volumeBlock = n.
i * n.
j * n.
k;
   518     return( 
float(volumeBlock) / volumeFullBlock );
   542       for (
int n = 0;  n < 
VMAX;  n++) { 
virial[n] = 0; }
   549       for (
int k = ka;  k <= kb;  k++) {
   550         for (
int j = ja;  j <= jb;  j++) {
   551           for (
int i = ia;  i <= ib;  i++) {
   573   CProxy_MsmTimer msmTimer;
   577   CkCallback *cbTiming;
   581   CProxy_MsmProfiler msmProfiler;
   585   CkCallback *cbProfiling;
   671         g = 1 + (s-1)*(-1./2 + (s-1)*(3./8));
   672         dg = (2*r_a)*(-1./2 + (s-1)*(3./4));
   675         g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16)));
   676         dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16)));
   679         g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
   680                 + (s-1)*(35./128))));
   681         dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
   685         g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
   686                 + (s-1)*(35./128 + (s-1)*(-63./256)))));
   687         dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
   688                 + (s-1)*(35./32 + (s-1)*(-315./256)))));
   691         g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
   692                 + (s-1)*(35./128 + (s-1)*(-63./256
   693                     + (s-1)*(231./1024))))));
   694         dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
   695                 + (s-1)*(35./32 + (s-1)*(-315./256
   696                     + (s-1)*(693./512))))));
   699         g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
   700             + (s-1)*(35./128 + (s-1)*(-63./256
   701                 + (s-1)*(231./1024 + (s-1)*(-429./2048)))))));
   702         dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
   703                 + (s-1)*(35./32 + (s-1)*(-315./256
   704                     + (s-1)*(693./512 + (s-1)*(-3003./2048)))))));
   707         g = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
   708                 + (s-1)*(35./128 + (s-1)*(-63./256
   709                     + (s-1)*(231./1024 + (s-1)*(-429./2048
   710                         + (s-1)*(6435./32768))))))));
   711         dg = (2*r_a)*(-1./2 + (s-1)*(3./4 + (s-1)*(-15./16
   712                 + (s-1)*(35./32 + (s-1)*(-315./256
   713                     + (s-1)*(693./512 + (s-1)*(-3003./2048
   714                         + (s-1)*(6435./4096))))))));
   717         g = 1 + (s-1)*(-3 + (s-1)*(6));
   718         dg = (2*r_a)*(-3 + (s-1)*(12));
   721         g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10)));
   722         dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30)));
   725         g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10 + (s-1)*(15))));
   726         dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30 + (s-1)*(60))));
   729         g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
   730                 + (s-1)*(15 + (s-1)*(-21)))));
   731         dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
   732                 + (s-1)*(60 + (s-1)*(-105)))));
   735         g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
   736                 + (s-1)*(15 + (s-1)*(-21 + (s-1)*(28))))));
   737         dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
   738                 + (s-1)*(60 + (s-1)*(-105 + (s-1)*(168))))));
   741         g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
   742                 + (s-1)*(15 + (s-1)*(-21 + (s-1)*(28
   744         dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
   745                 + (s-1)*(60 + (s-1)*(-105 + (s-1)*(168
   746                       + (s-1)*(-252)))))));
   749         g = 1 + (s-1)*(-3 + (s-1)*(6 + (s-1)*(-10
   750                 + (s-1)*(15 + (s-1)*(-21 + (s-1)*(28
   751                       + (s-1)*(-36 + (s-1)*(45))))))));
   752         dg = (2*r_a)*(-3 + (s-1)*(12 + (s-1)*(-30
   753                 + (s-1)*(60 + (s-1)*(-105 + (s-1)*(168
   754                       + (s-1)*(-252 + (s-1)*(360))))))));
   764         phi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t);
   766         phi[1] = (1 - t) * (1 + t - 1.5f * t * t);
   768         phi[2] = (1 + t) * (1 - t - 1.5f * t * t);
   770         phi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t);
   773         phi[0] = (1.f/24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t);
   775         phi[1] = (1-t) * (2-t) * (3-t) * ((1.f/6)
   776             + t * (0.375f - (5.f/24)*t));
   778         phi[2] = (1-t*t) * (2-t) * (0.5f + t * (0.25f - (5.f/12)*t));
   780         phi[3] = (1-t*t) * (2+t) * (0.5f - t * (0.25f + (5.f/12)*t));
   782         phi[4] = (1+t) * (2+t) * (3+t) * ((1.f/6)
   783             - t * (0.375f + (5.f/24)*t));
   785         phi[5] = (1.f/24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t);
   788         phi[0] = (1.f/24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8);
   790         phi[1] = (-1.f/24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25)));
   792         phi[2] = (1.f/12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25))));
   794         phi[3] = (1.f/12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25))));
   796         phi[4] = (-1.f/24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25)));
   798         phi[5] = (1.f/24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8);
   801         phi[0] = (-1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6);
   803         phi[1] = (1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t));
   805         phi[2] = (-1.f/240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t));
   807         phi[3] = (1.f/144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t));
   809         phi[4] = (-1.f/144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t));
   811         phi[5] = (1.f/240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t));
   813         phi[6] = (-1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t));
   815         phi[7] = (1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6);
   818         phi[0] = (3632.f/5) + t*((-7456.f/5) + t*((58786.f/45) + t*(-633
   819                 + t*((26383.f/144) + t*((-22807.f/720) + t*((727.f/240)
   820                       + t*(-89.f/720)))))));
   822         phi[1] = -440 + t*((25949.f/20) + t*((-117131.f/72) + t*((2247.f/2)
   823                 + t*((-66437.f/144) + t*((81109.f/720) + t*((-727.f/48)
   824                       + t*(623.f/720)))))));
   826         phi[2] = (138.f/5) + t*((-8617.f/60) + t*((12873.f/40) + t*((-791.f/2)
   827                 + t*((4557.f/16) + t*((-9583.f/80) + t*((2181.f/80)
   828                       + t*(-623.f/240)))))));
   830         phi[3] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((2569.f/144)
   831                 + t*((-727.f/48) + t*(623.f/144)))));
   833         phi[4] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((-2569.f/144)
   834                 + t*((-727.f/48) + t*(-623.f/144)))));
   836         phi[5] = (138.f/5) + t*((8617.f/60) + t*((12873.f/40) + t*((791.f/2)
   837                 + t*((4557.f/16) + t*((9583.f/80) + t*((2181.f/80)
   838                       + t*(623.f/240)))))));
   840         phi[6] = -440 + t*((-25949.f/20) + t*((-117131.f/72) + t*((-2247.f/2)
   841                 + t*((-66437.f/144) + t*((-81109.f/720) + t*((-727.f/48)
   842                       + t*(-623.f/720)))))));
   844         phi[7] = (3632.f/5) + t*((7456.f/5) + t*((58786.f/45) + t*(633
   845                 + t*((26383.f/144) + t*((22807.f/720) + t*((727.f/240)
   846                       + t*(89.f/720)))))));
   849         phi[0] = (-1.f/40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)*
   852         phi[1] = (1.f/40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*
   855         phi[2] = (-1.f/10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*
   858         phi[3] = (1.f/1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*
   861         phi[4] = (-1.f/2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*
   864         phi[5] = (1.f/2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*
   867         phi[6] = (-1.f/1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*
   870         phi[7] = (1.f/10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*
   873         phi[8] = (-1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)*
   876         phi[9] = (1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)*
   883         Tphi[0] = 439375./7+T*(-64188125./504+T*(231125375./2016
   884               +T*(-17306975./288+T*(7761805./384+T*(-2895587./640
   885                     +T*(129391./192+T*(-259715./4032+T*(28909./8064
   886                           +T*(-3569./40320)))))))));
   888         Tphi[1] = -56375+T*(8314091./56+T*(-49901303./288+T*(3763529./32
   889                 +T*(-19648027./384+T*(9469163./640+T*(-545977./192
   890                       +T*(156927./448+T*(-28909./1152
   891                           +T*(3569./4480)))))))));
   893         Tphi[2] = 68776./7+T*(-1038011./28+T*(31157515./504+T*(-956669./16
   894                 +T*(3548009./96+T*(-2422263./160+T*(197255./48
   895                       +T*(-19959./28+T*(144545./2016
   896                           +T*(-3569./1120)))))))));
   898         Tphi[3] = -154+T*(12757./12+T*(-230123./72+T*(264481./48
   899                 +T*(-576499./96+T*(686147./160+T*(-96277./48
   900                       +T*(14221./24+T*(-28909./288+T*(3569./480)))))))));
   902         Tphi[4] = 1+T*T*(-205./144+T*T*(91./192+T*(-6181./320
   903                 +T*(6337./96+T*(-2745./32+T*(28909./576
   904                       +T*(-3569./320)))))));
   906         Tphi[5] = 1+T*T*(-205./144+T*T*(91./192+T*(6181./320
   907                 +T*(6337./96+T*(2745./32+T*(28909./576
   908                       +T*(3569./320)))))));
   910         Tphi[6] = -154+T*(-12757./12+T*(-230123./72+T*(-264481./48
   911                 +T*(-576499./96+T*(-686147./160+T*(-96277./48
   912                       +T*(-14221./24+T*(-28909./288+T*(-3569./480)))))))));
   914         Tphi[7] = 68776./7+T*(1038011./28+T*(31157515./504+T*(956669./16
   915                 +T*(3548009./96+T*(2422263./160+T*(197255./48
   916                       +T*(19959./28+T*(144545./2016+T*(3569./1120)))))))));
   918         Tphi[8] = -56375+T*(-8314091./56+T*(-49901303./288+T*(-3763529./32
   919                 +T*(-19648027./384+T*(-9469163./640+T*(-545977./192
   920                       +T*(-156927./448+T*(-28909./1152
   921                           +T*(-3569./4480)))))))));
   923         Tphi[9] = 439375./7+T*(64188125./504+T*(231125375./2016
   924               +T*(17306975./288+T*(7761805./384+T*(2895587./640
   925                     +T*(129391./192+T*(259715./4032+T*(28909./8064
   926                           +T*(3569./40320)))))))));
   927         for (
int i=0;  i < 10;  i++) {
   928           phi[i] = 
Float(Tphi[i]);
   933         NAMD_die(
"Unknown MSM approximation.");
   940         phi[0] = 0.5f * (1 - t) * (2 - t) * (2 - t);
   941         dphi[0] = (1.5f * t - 2) * (2 - t) * h_1;
   943         phi[1] = (1 - t) * (1 + t - 1.5f * t * t);
   944         dphi[1] = (-5 + 4.5f * t) * t * h_1;
   946         phi[2] = (1 + t) * (1 - t - 1.5f * t * t);
   947         dphi[2] = (-5 - 4.5f * t) * t * h_1;
   949         phi[3] = 0.5f * (1 + t) * (2 + t) * (2 + t);
   950         dphi[3] = (1.5f * t + 2) * (2 + t) * h_1;
   953         phi[0] = (1.f/24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t);
   954         dphi[0] = ((-1.f/24) * ((3-t) * (3-t) * (14 + t * (-14 + 3*t))
   955               + 2 * (1-t) * (2-t) * (3-t) * (4-t))) * h_1;
   957         phi[1] = (1-t) * (2-t) * (3-t) * ((1.f/6)
   958             + t * (0.375f - (5.f/24)*t));
   959         dphi[1] = (-((1.f/6) + t * (0.375f - (5.f/24)*t)) *
   960             (11 + t * (-12 + 3*t)) + (1-t) * (2-t) * (3-t) *
   961             (0.375f - (5.f/12)*t)) * h_1;
   963         phi[2] = (1-t*t) * (2-t) * (0.5f + t * (0.25f - (5.f/12)*t));
   964         dphi[2] = (-(0.5f + t * (0.25f - (5.f/12)*t)) * (1 + t * (4 - 3*t))
   965             + (1-t*t) * (2-t) * (0.25f - (5.f/6)*t)) * h_1;
   967         phi[3] = (1-t*t) * (2+t) * (0.5f - t * (0.25f + (5.f/12)*t));
   968         dphi[3] = ((0.5f + t * (-0.25f - (5.f/12)*t)) * (1 + t * (-4 - 3*t))
   969             - (1-t*t) * (2+t) * (0.25f + (5.f/6)*t)) * h_1;
   971         phi[4] = (1+t) * (2+t) * (3+t) * ((1.f/6)
   972             - t * (0.375f + (5.f/24)*t));
   973         dphi[4] = (((1.f/6) + t * (-0.375f - (5.f/24)*t)) *
   974             (11 + t * (12 + 3*t)) - (1+t) * (2+t) * (3+t) *
   975             (0.375f + (5.f/12)*t)) * h_1;
   977         phi[5] = (1.f/24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t);
   978         dphi[5] = ((1.f/24) * ((3+t) * (3+t) * (14 + t * (14 + 3*t))
   979               + 2 * (1+t) * (2+t) * (3+t) * (4+t))) * h_1;
   982         phi[0] = (1.f/24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8);
   983         dphi[0] = ((1.f/24) * (3-t) * (3-t) * ((3-t)*(5*t-8)
   984               - 3*(t-2)*(5*t-8) + 5*(t-2)*(3-t))) * h_1;
   986         phi[1] = (-1.f/24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25)));
   987         dphi[1] = ((-1.f/24) * ((2-t)*(-48+t*(153+t*(-114+t*25)))
   988               - (t-1)* (-48+t*(153+t*(-114+t*25)))
   989               + (2-t)*(t-1)*(153+t*(-228+t*75)))) * h_1;
   991         phi[2] = (1.f/12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25))));
   992         dphi[2] = ((1.f/12) * (-(12+t*(12+t*(-3+t*(-38+t*25))))
   993               + (1-t)*(12+t*(-6+t*(-114+t*100))))) * h_1;
   995         phi[3] = (1.f/12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25))));
   996         dphi[3] = ((1.f/12) * ((12+t*(-12+t*(-3+t*(38+t*25))))
   997               + (1+t)*(-12+t*(-6+t*(114+t*100))))) * h_1;
   999         phi[4] = (-1.f/24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25)));
  1000         dphi[4] = ((-1.f/24) * ((2+t)*(48+t*(153+t*(114+t*25)))
  1001               + (t+1)* (48+t*(153+t*(114+t*25)))
  1002               + (2+t)*(t+1)*(153+t*(228+t*75)))) * h_1;
  1004         phi[5] = (1.f/24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8);
  1005         dphi[5] = ((1.f/24) * (3+t) * (3+t) * ((3+t)*(5*t+8)
  1006               + 3*(t+2)*(5*t+8) + 5*(t+2)*(3+t))) * h_1;
  1009         phi[0] = (-1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6);
  1010         dphi[0] = (-1.f/720)*(t-4)*(-1944+t*(3644+t*(-2512+t*(807
  1011                   +t*(-122+t*7))))) * h_1;
  1013         phi[1] = (1.f/720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t));
  1014         dphi[1] = (1.f/720)*(756+t*(-9940+t*(17724+t*(-12740+t*(4445
  1015                     +t*(-750+t*49)))))) * h_1;
  1017         phi[2] = (-1.f/240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t));
  1018         dphi[2] = (-1.f/240)*(-28+t*(1260+t*(-756+t*(-1260+t*(1365
  1019                     +t*(-450+t*49)))))) * h_1;
  1021         phi[3] = (1.f/144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t));
  1022         dphi[3] = (1.f/144)*t*(-560+t*(84+t*(644+t*(-175
  1023                   +t*(-150+t*49))))) * h_1;
  1025         phi[4] = (-1.f/144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t));
  1026         dphi[4] = (-1.f/144)*t*(560+t*(84+t*(-644+t*(-175
  1027                   +t*(150+t*49))))) * h_1;
  1029         phi[5] = (1.f/240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t));
  1030         dphi[5] = (1.f/240)*(-28+t*(-1260+t*(-756+t*(1260+t*(1365
  1031                     +t*(450+t*49)))))) * h_1;
  1033         phi[6] = (-1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t));
  1034         dphi[6] = (-1.f/720)*(756+t*(9940+t*(17724+t*(12740+t*(4445
  1035                     +t*(750+t*49)))))) * h_1;
  1037         phi[7] = (1.f/720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6);
  1038         dphi[7] = (1.f/720)*(t+4)*(1944+t*(3644+t*(2512+t*(807
  1039                   +t*(122+t*7))))) * h_1;
  1042         phi[0] = (3632.f/5) + t*((-7456.f/5) + t*((58786.f/45) + t*(-633
  1043                 + t*((26383.f/144) + t*((-22807.f/720) + t*((727.f/240)
  1044                       + t*(-89.f/720)))))));
  1045         dphi[0] = ((-7456.f/5) + t*((117572.f/45) + t*(-1899
  1046                 + t*((26383.f/36) + t*((-22807.f/144) + t*((727.f/40)
  1047                       + t*(-623.f/720))))))) * h_1;
  1049         phi[1] = -440 + t*((25949.f/20) + t*((-117131.f/72) + t*((2247.f/2)
  1050                 + t*((-66437.f/144) + t*((81109.f/720) + t*((-727.f/48)
  1051                       + t*(623.f/720)))))));
  1052         dphi[1] = ((25949.f/20) + t*((-117131.f/36) + t*((6741.f/2)
  1053                 + t*((-66437.f/36) + t*((81109.f/144) + t*((-727.f/8)
  1054                       + t*(4361.f/720))))))) * h_1;
  1056         phi[2] = (138.f/5) + t*((-8617.f/60) + t*((12873.f/40) + t*((-791.f/2)
  1057                 + t*((4557.f/16) + t*((-9583.f/80) + t*((2181.f/80)
  1058                       + t*(-623.f/240)))))));
  1059         dphi[2] = ((-8617.f/60) + t*((12873.f/20) + t*((-2373.f/2)
  1060                 + t*((4557.f/4) + t*((-9583.f/16) + t*((6543.f/40)
  1061                       + t*(-4361.f/240))))))) * h_1;
  1063         phi[3] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((2569.f/144)
  1064                 + t*((-727.f/48) + t*(623.f/144)))));
  1065         dphi[3] = (t*((-49.f/18) + t*t*((-959.f/36) + t*((12845.f/144)
  1066                   + t*((-727.f/8) + t*(4361.f/144)))))) * h_1;
  1068         phi[4] = 1 + t*t*((-49.f/36) + t*t*((-959.f/144) + t*((-2569.f/144)
  1069                 + t*((-727.f/48) + t*(-623.f/144)))));
  1070         dphi[4] = (t*((-49.f/18) + t*t*((-959.f/36) + t*((-12845.f/144)
  1071                   + t*((-727.f/8) + t*(-4361.f/144)))))) * h_1;
  1073         phi[5] = (138.f/5) + t*((8617.f/60) + t*((12873.f/40) + t*((791.f/2)
  1074                 + t*((4557.f/16) + t*((9583.f/80) + t*((2181.f/80)
  1075                       + t*(623.f/240)))))));
  1076         dphi[5] = ((8617.f/60) + t*((12873.f/20) + t*((2373.f/2)
  1077                 + t*((4557.f/4) + t*((9583.f/16) + t*((6543.f/40)
  1078                       + t*(4361.f/240))))))) * h_1;
  1080         phi[6] = -440 + t*((-25949.f/20) + t*((-117131.f/72) + t*((-2247.f/2)
  1081                 + t*((-66437.f/144) + t*((-81109.f/720) + t*((-727.f/48)
  1082                       + t*(-623.f/720)))))));
  1083         dphi[6] = ((-25949.f/20) + t*((-117131.f/36) + t*((-6741.f/2)
  1084                 + t*((-66437.f/36) + t*((-81109.f/144) + t*((-727.f/8)
  1085                       + t*(-4361.f/720))))))) * h_1;
  1087         phi[7] = (3632.f/5) + t*((7456.f/5) + t*((58786.f/45) + t*(633
  1088                 + t*((26383.f/144) + t*((22807.f/720) + t*((727.f/240)
  1089                       + t*(89.f/720)))))));
  1090         dphi[7] = ((7456.f/5) + t*((117572.f/45) + t*(1899
  1091                 + t*((26383.f/36) + t*((22807.f/144) + t*((727.f/40)
  1092                       + t*(623.f/720))))))) * h_1;
  1095         phi[0] = (-1.f/40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)*
  1097         dphi[0] = (-1.f/40320)*(t-5)*(-117648+t*(256552+t*(-221416
  1098                 +t*(99340+t*(-25261+t*(3667+t*(-283+t*9)))))))*h_1;
  1100         phi[1] = (1.f/40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*
  1102         dphi[1] = (1.f/40320)*(71856+t*(-795368+t*(1569240+t*(-1357692
  1103                   +t*(634725+t*(-172116+t*(27090+t*(-2296+t*81))))))))*h_1;
  1105         phi[2] = (-1.f/10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*
  1107         dphi[2] = (1.f/10080)*(3384+t*(-69080+t*(55026
  1108                 +t*(62580+t*(-99225+t*(51660+t*(-13104+t*(1640
  1109                           +t*(-81)))))))))*h_1;
  1111         phi[3] = (1.f/1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*
  1113         dphi[3] = (1.f/1440)*(72+t*(-6344+t*(2070
  1114                 +t*(7644+t*(-4725+t*(-828+t*(1260+t*(-328+t*27))))))))*h_1;
  1116         phi[4] = (-1.f/2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*
  1118         dphi[4] = (-1.f/2880)*t*(10792+t*(-972+t*(-12516
  1119                 +t*(2205+t*(3924+t*(-882+t*(-328+t*81)))))))*h_1;
  1121         phi[5] = (1.f/2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*
  1123         dphi[5] = (1.f/2880)*t*(-10792+t*(-972+t*(12516
  1124                 +t*(2205+t*(-3924+t*(-882+t*(328+t*81)))))))*h_1;
  1126         phi[6] = (-1.f/1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*
  1128         dphi[6] = (1.f/1440)*(-72+t*(-6344+t*(-2070
  1129                 +t*(7644+t*(4725+t*(-828+t*(-1260+t*(-328+t*(-27)))))))))*h_1;
  1131         phi[7] = (1.f/10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*
  1133         dphi[7] = (1.f/10080)*(-3384+t*(-69080+t*(-55026
  1134                 +t*(62580+t*(99225+t*(51660+t*(13104+t*(1640+t*81))))))))*h_1;
  1136         phi[8] = (-1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)*
  1138         dphi[8] = (-1.f/40320)*(71856+t*(795368+t*(1569240
  1139                 +t*(1357692+t*(634725+t*(172116+t*(27090+t*(2296
  1142         phi[9] = (1.f/40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)*
  1144         dphi[9] = (1.f/40320)*(t+5)*(117648+t*(256552+t*(221416
  1145                 +t*(99340+t*(25261+t*(3667+t*(283+t*9)))))))*h_1;
  1149         double Tphi[10], Tdphi[10];
  1151         Tphi[0] = 439375./7+T*(-64188125./504+T*(231125375./2016
  1152               +T*(-17306975./288+T*(7761805./384+T*(-2895587./640
  1153                     +T*(129391./192+T*(-259715./4032+T*(28909./8064
  1154                           +T*(-3569./40320)))))))));
  1155         Tdphi[0] = (-64188125./504+T*(231125375./1008
  1156               +T*(-17306975./96+T*(7761805./96+T*(-2895587./128
  1157                     +T*(129391./32+T*(-259715./576+T*(28909./1008
  1158                           +T*(-3569./4480))))))))) * h_1;
  1160         Tphi[1] = -56375+T*(8314091./56+T*(-49901303./288+T*(3763529./32
  1161                 +T*(-19648027./384+T*(9469163./640+T*(-545977./192
  1162                       +T*(156927./448+T*(-28909./1152
  1163                           +T*(3569./4480)))))))));
  1164         Tdphi[1] = (8314091./56+T*(-49901303./144+T*(11290587./32
  1165                 +T*(-19648027./96+T*(9469163./128+T*(-545977./32
  1166                       +T*(156927./64+T*(-28909./144
  1167                           +T*(32121./4480))))))))) * h_1;
  1169         Tphi[2] = 68776./7+T*(-1038011./28+T*(31157515./504+T*(-956669./16
  1170                 +T*(3548009./96+T*(-2422263./160+T*(197255./48
  1171                       +T*(-19959./28+T*(144545./2016
  1172                           +T*(-3569./1120)))))))));
  1173         Tdphi[2] = (-1038011./28+T*(31157515./252+T*(-2870007./16
  1174                 +T*(3548009./24+T*(-2422263./32+T*(197255./8
  1175                       +T*(-19959./4+T*(144545./252
  1176                           +T*(-32121./1120))))))))) * h_1;
  1178         Tphi[3] = -154+T*(12757./12+T*(-230123./72+T*(264481./48
  1179                 +T*(-576499./96+T*(686147./160+T*(-96277./48
  1180                       +T*(14221./24+T*(-28909./288+T*(3569./480)))))))));
  1181         Tdphi[3] = (12757./12+T*(-230123./36+T*(264481./16
  1182                 +T*(-576499./24+T*(686147./32+T*(-96277./8
  1183                       +T*(99547./24+T*(-28909./36
  1184                           +T*(10707./160))))))))) * h_1;
  1186         Tphi[4] = 1+T*T*(-205./144+T*T*(91./192+T*(-6181./320
  1187                 +T*(6337./96+T*(-2745./32+T*(28909./576
  1188                       +T*(-3569./320)))))));
  1189         Tdphi[4] = T*(-205./72+T*T*(91./48+T*(-6181./64
  1190                 +T*(6337./16+T*(-19215./32+T*(28909./72
  1191                       +T*(-32121./320))))))) * h_1;
  1193         Tphi[5] = 1+T*T*(-205./144+T*T*(91./192+T*(6181./320
  1194                 +T*(6337./96+T*(2745./32+T*(28909./576
  1195                       +T*(3569./320)))))));
  1196         Tdphi[5] = T*(-205./72+T*T*(91./48+T*(6181./64
  1197                 +T*(6337./16+T*(19215./32+T*(28909./72
  1198                       +T*(32121./320))))))) * h_1;
  1200         Tphi[6] = -154+T*(-12757./12+T*(-230123./72+T*(-264481./48
  1201                 +T*(-576499./96+T*(-686147./160+T*(-96277./48
  1202                       +T*(-14221./24+T*(-28909./288+T*(-3569./480)))))))));
  1203         Tdphi[6] = (-12757./12+T*(-230123./36+T*(-264481./16
  1204                 +T*(-576499./24+T*(-686147./32+T*(-96277./8
  1205                       +T*(-99547./24+T*(-28909./36
  1206                           +T*(-10707./160))))))))) * h_1;
  1208         Tphi[7] = 68776./7+T*(1038011./28+T*(31157515./504+T*(956669./16
  1209                 +T*(3548009./96+T*(2422263./160+T*(197255./48
  1210                       +T*(19959./28+T*(144545./2016+T*(3569./1120)))))))));
  1211         Tdphi[7] = (1038011./28+T*(31157515./252+T*(2870007./16
  1212                 +T*(3548009./24+T*(2422263./32+T*(197255./8
  1213                       +T*(19959./4+T*(144545./252
  1214                           +T*(32121./1120))))))))) * h_1;
  1216         Tphi[8] = -56375+T*(-8314091./56+T*(-49901303./288+T*(-3763529./32
  1217                 +T*(-19648027./384+T*(-9469163./640+T*(-545977./192
  1218                       +T*(-156927./448+T*(-28909./1152
  1219                           +T*(-3569./4480)))))))));
  1220         Tdphi[8] = (-8314091./56+T*(-49901303./144+T*(-11290587./32
  1221                 +T*(-19648027./96+T*(-9469163./128+T*(-545977./32
  1222                       +T*(-156927./64+T*(-28909./144
  1223                           +T*(-32121./4480))))))))) * h_1;
  1225         Tphi[9] = 439375./7+T*(64188125./504+T*(231125375./2016
  1226               +T*(17306975./288+T*(7761805./384+T*(2895587./640
  1227                     +T*(129391./192+T*(259715./4032+T*(28909./8064
  1228                           +T*(3569./40320)))))))));
  1229         Tdphi[9] = (64188125./504+T*(231125375./1008
  1230               +T*(17306975./96+T*(7761805./96+T*(2895587./128
  1231                     +T*(129391./32+T*(259715./576+T*(28909./1008
  1232                           +T*(3569./4480))))))))) * h_1;
  1233         for (
int i=0;  i < 10;  i++) {
  1234           phi[i] = 
Float(Tphi[i]);
  1235           dphi[i] = 
Float(Tdphi[i]);
  1240         NAMD_die(
"Unknown MSM approximation.");
  1245     phi[0] = (1 - t) * (1 - t) * (1 + 2*t);
  1246     psi[0] = h * t * (1 - t) * (1 - t);
  1248     phi[1] = (1 + t) * (1 + t) * (1 - 2*t);
  1249     psi[1] = h * t * (1 + t) * (1 + t);
  1255     phi[0] = (1 - t) * (1 - t) * (1 + 2*t);
  1256     dphi[0] = -6 * t * (1 - t) * h_1;
  1257     psi[0] = h * t * (1 - t) * (1 - t);
  1258     dpsi[0] = (1 - t) * (1 - 3*t);
  1260     phi[1] = (1 + t) * (1 + t) * (1 - 2*t);
  1261     dphi[1] = -6 * t * (1 + t) * h_1;
  1262     psi[1] = h * t * (1 + t) * (1 + t);
  1263     dpsi[1] = (1 + t) * (1 + 3*t);
  1273           pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8));
  1275           pg[k++] = -1./2 + (s-1)*(3./4);
  1280           pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16)));
  1282           pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16));
  1284           pg[k++] = 3./4 + (s-1)*(-15./8);
  1289           pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
  1290                   + (s-1)*(35./128))));
  1292           pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32)));
  1294           pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32));
  1296           pg[k++] = -15./8 + (s-1)*(105./16);
  1301           pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
  1302                   + (s-1)*(35./128 + (s-1)*(-63./256)))));
  1304           pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
  1305                   + (s-1)*(-315./256))));
  1307           pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64)));
  1309           pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64));
  1311           pg[k++] = 105./16 + (s-1)*(-945./32);
  1316           pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
  1317                   + (s-1)*(35./128 + (s-1)*(-63./256 + (s-1)*(231./1024))))));
  1319           pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
  1320                   + (s-1)*(-315./256 + (s-1)*(693./512)))));
  1322           pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64
  1323                   + (s-1)*(3465./512))));
  1325           pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64
  1326                 + (s-1)*(3465./128)));
  1328           pg[k++] = 105./16 + (s-1)*(-945./32 + (s-1)*(10395./128));
  1330           pg[k++] = -945./32 + (s-1)*(10395./64);
  1332           pg[k++] = 10395./64;
  1335           pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
  1336                   + (s-1)*(35./128 + (s-1)*(-63./256
  1337                       + (s-1)*(231./1024 + (s-1)*(-429./2048)))))));
  1339           pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
  1340                   + (s-1)*(-315./256 + (s-1)*(693./512
  1341                       + (s-1)*(-3003./2048))))));
  1343           pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64
  1344                   + (s-1)*(3465./512 + (s-1)*(-9009./1024)))));
  1346           pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64 + (s-1)*(3465./128
  1347                   + (s-1)*(-45045./1024))));
  1349           pg[k++] = 105./16 + (s-1)*(-945./32 + (s-1)*(10395./128
  1350                 + (s-1)*(-45045./256)));
  1352           pg[k++] = -945./32 + (s-1)*(10395./64 + (s-1)*(-135135./256));
  1354           pg[k++] = 10395./64 + (s-1)*(-135135./128);
  1356           pg[k++] = -135135./128;
  1359           pg[k++] = 1 + (s-1)*(-1./2 + (s-1)*(3./8 + (s-1)*(-5./16
  1360                   + (s-1)*(35./128 + (s-1)*(-63./256
  1361                       + (s-1)*(231./1024 + (s-1)*(-429./2048
  1362                           + (s-1)*(6435./32768))))))));
  1364           pg[k++] = -1./2 + (s-1)*(3./4 + (s-1)*(-15./16 + (s-1)*(35./32
  1365                   + (s-1)*(-315./256 + (s-1)*(693./512
  1366                       + (s-1)*(-3003./2048 + (s-1)*(6435./4096)))))));
  1368           pg[k++] = 3./4 + (s-1)*(-15./8 + (s-1)*(105./32 + (s-1)*(-315./64
  1369                   + (s-1)*(3465./512 + (s-1)*(-9009./1024
  1370                       + (s-1)*(45045./4096))))));
  1372           pg[k++] = -15./8 + (s-1)*(105./16 + (s-1)*(-945./64 + (s-1)*(3465./128
  1373                   + (s-1)*(-45045./1024 + (s-1)*(135135./2048)))));
  1375           pg[k++] = 105./16 + (s-1)*(-945./32 + (s-1)*(10395./128
  1376                 + (s-1)*(-45045./256 + (s-1)*(675675./2048))));
  1378           pg[k++] = -945./32 + (s-1)*(10395./64 + (s-1)*(-135135./256
  1379                 + (s-1)*(675675./512)));
  1381           pg[k++] = 10395./64 + (s-1)*(-135135./128 + (s-1)*(2027025./512));
  1383           pg[k++] = -135135./128 + (s-1)*(2027025./256);
  1385           pg[k++] = 2027025./256;
  1388           NAMD_die(
"Unknown MSM splitting.");
  1406     while (k < n) pg[k++] = 0;
  1413     const BigReal a_2 = a_1 * a_1;
  1435     tmp = _c * p[1] * dx;
  1439     tmp = _c * p[1] * dy;
  1443     tmp = _c * p[1] * dz;
  1450     tmp = _c * p[2] * dx * dy;
  1456     tmp = _c * p[2] * dx * dz;
  1462     tmp = _c * p[2] * dy * dz;
  1468     tmp = _c * (p[2] * dx*dx + p[1] * dd);
  1470     tmp = _c * (p[2] * dy*dy + p[1] * dd);
  1472     tmp = _c * (p[2] * dz*dz + p[1] * dd);
  1476     if (_split == 
TAYLOR2) 
return;
  1479     tmp = _c * p[3] * dx * dy * dz;
  1489     tmp = _c * (p[3] * dx*dx * dy + p[2] * dd * dy);
  1493     tmp = _c * (p[3] * dx*dx * dz + p[2] * dd * dz);
  1497     tmp = _c * (p[3] * dy*dy * dx + p[2] * dd * dx);
  1501     tmp = _c * (p[3] * dy*dy * dz + p[2] * dd * dz);
  1505     tmp = _c * (p[3] * dz*dz * dx + p[2] * dd * dx);
  1509     tmp = _c * (p[3] * dz*dz * dy + p[2] * dd * dy);
  1514     if (_split == 
TAYLOR3) 
return;
  1517     tmp = _c * (p[4] * dx*dx * dy * dz + p[3] * dd * dy * dz);
  1523     tmp = _c * (p[4] * dy*dy * dx * dz + p[3] * dd * dx * dz);
  1529     tmp = _c * (p[4] * dz*dz * dx * dy + p[3] * dd * dx * dy);
  1535     tmp = _c * (p[4] * dx*dx * dy*dy + p[3] * dx*dx * dd
  1536         + p[3] * dd * dy*dy + p[2] * dd * dd);
  1538     tmp = _c * (p[4] * dx*dx * dz*dz + p[3] * dx*dx * dd
  1539         + p[3] * dd * dz*dz + p[2] * dd * dd);
  1541     tmp = _c * (p[4] * dy*dy * dz*dz + p[3] * dy*dy * dd
  1542         + p[3] * dd * dz*dz + p[2] * dd * dd);
  1546     if (_split == 
TAYLOR4) 
return;
  1549     tmp = _c * (p[5] * dx*dx * dy*dy * dz + p[4] * dx*dx * dd * dz
  1550         + p[4] * dd * dy*dy * dz + p[3] * dd * dd * dz);
  1554     tmp = _c * (p[5] * dx*dx * dz*dz * dy + p[4] * dx*dx * dd * dy
  1555         + p[4] * dd * dz*dz * dy + p[3] * dd * dd * dy);
  1559     tmp = _c * (p[5] * dy*dy * dz*dz * dx + p[4] * dy*dy * dd * dx
  1560         + p[4] * dd * dz*dz * dx + p[3] * dd * dd * dx);
  1565     if (_split == 
TAYLOR5) 
return;
  1568     tmp = _c * (p[6] * dx*dx * dy*dy * dz*dz + p[5] * dx*dx * dy*dy * dd
  1569         + p[5] * dx*dx * dd * dz*dz + p[5] * dd * dy*dy * dz*dz
  1570         + p[4] * dx*dx * dd * dd + p[4] * dd * dy*dy * dd
  1571         + p[4] * dd * dd * dz*dz + p[3] * dd * dd * dd);
  1586   3, 5, 5, 7, 7, 9, 9, 1,
  1591   5, 7, 7, 9, 9, 11, 11, 3,
  1602   {-5, -3, -1, 0, 1, 3, 5},
  1605   {-5, -3, -1, 0, 1, 3, 5},
  1608   {-7, -5, -3, -1, 0, 1, 3, 5, 7},
  1611   {-7, -5, -3, -1, 0, 1, 3, 5, 7},
  1614   {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
  1617   {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
  1628   {-1.f/16, 9.f/16, 1, 9.f/16, -1.f/16},
  1631   {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
  1634   {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
  1637   { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
  1638     -245.f/2048, 49.f/2048, -5.f/2048 },
  1641   { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
  1642     -245.f/2048, 49.f/2048, -5.f/2048 },
  1645   { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384, 
  1646     19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384, 
  1647     -405.f/65536, 35.f/65536 },
  1650   { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384, 
  1651     19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384, 
  1652     -405.f/65536, 35.f/65536 },
  1665       mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
  1666           CkpvAccess(BOCclass_group).computeMsmMgr);
  1667 #ifdef MSM_NODE_MAPPING  1679       int *pn = (
int *)idx.data();
  1680 #ifdef MSM_NODE_MAPPING  1697       ComputeMsmMgr *mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
  1698           CkpvAccess(BOCclass_group).computeMsmMgr);
  1699 #ifdef MSM_NODE_MAPPING  1710       int n = *((
int *)idx.data());
  1711 #ifdef MSM_NODE_MAPPING  1760     void init(
int natoms);
  1789 template <
class Vtype, 
class Mtype>
  1810       mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
  1811           CkpvAccess(BOCclass_group).computeMsmMgr);
  1817 #ifdef MSM_PROFILING  1822 #ifdef MSM_MIGRATION  1823     void pup(PUP::er& p) {
  1827 #ifdef MSM_PROFILING  1836 #endif // MSM_MIGRATION  1906       double startTime, stopTime;
  1907       startTime = CkWallTimer();
  1917       stopTime = CkWallTimer();
  1927       startTime = stopTime;
  1933       int gia = 
pgc->
ia();
  1934       int gib = 
pgc->
ib();
  1935       int gja = 
pgc->
ja();
  1936       int gjb = 
pgc->
jb();
  1937       int gka = 
pgc->
ka();
  1938       int gkb = 
pgc->
kb();
  1939       int gni = 
pgc->
ni();
  1940       int gnj = 
pgc->
nj();
  1967 #ifndef MSM_COMM_ONLY  1969       for (
int k = ka;  k <= kb;  k++) {
  1971         int mka = ( qka >= gka + k ? qka : gka + k );
  1972         int mkb = ( qkb <= gkb + k ? qkb : gkb + k );
  1974         for (
int j = ja;  j <= jb;  j++) {
  1976           int mja = ( qja >= gja + j ? qja : gja + j );
  1977           int mjb = ( qjb <= gjb + j ? qjb : gjb + j );
  1979           for (
int i = ia;  i <= ib;  i++) {
  1981             int mia = ( qia >= gia + i ? qia : gia + i );
  1982             int mib = ( qib <= gib + i ? qib : gib + i );
  1989             for (
int qk = mka;  qk <= mkb;  qk++) {
  1990               int qkoff = (qk - qka) * qnj;
  1991               int gkoff = ((qk-k) - gka) * gnj;
  1993               for (
int qj = mja;  qj <= mjb;  qj++) {
  1994                 int qjkoff = (qkoff + qj - qja) * qni;
  1995                 int gjkoff = (gkoff + (qj-j) - gja) * gni;
  1998 #if defined(__INTEL_COMPILER)  1999 #pragma vector always   2001                 for (
int qi = mia;  qi <= mib;  qi++) {
  2002                   int qijkoff = qjkoff + qi - qia;
  2003                   int gijkoff = gjkoff + (qi-i) - gia;
  2005                   ehsum += gcbuffer[gijkoff] * qhbuffer[qijkoff];
  2013             int nn = mib - mia + 1;
  2014             for (
int qk = mka;  qk <= mkb;  qk++) {
  2015               int qkoff = (qk - qka) * qnj;
  2016               int gkoff = ((qk-k) - gka) * gnj;
  2018               for (
int qj = mja;  qj <= mjb;  qj++) {
  2019                 int qjkoff = (qkoff + qj - qja) * qni;
  2020                 int gjkoff = (gkoff + (qj-j) - gja) * gni;
  2022                 const Float *qbuf = qhbuffer + (qjkoff - qia + mia);
  2023                 const Float *gbuf = gcbuffer + (gjkoff - i - gia + mia);
  2024 #ifdef MSM_PROFILING  2028 #if defined(__INTEL_COMPILER)  2029 #pragma vector always   2031                 for (
int ii = 0;  ii < nn;  ii++) {
  2032                   ehsum += gbuf[ii] * qbuf[ii];
  2038             int nn = mib - mia + 1;
  2040               int qnji = qnj * qni;
  2041               int qkoff = -qka*qnji - qja*qni - qia + mia;
  2042               int gnji = gnj * gni;
  2043               int gkoff = (-k-gka)*gnji + (-j-gja)*gni - i - gia + mia;
  2045               for (
int qk = mka;  qk <= mkb;  qk++) {
  2046                 int qjkoff = qkoff + qk*qnji;
  2047                 int gjkoff = gkoff + qk*gnji;
  2049                 for (
int qj = mja;  qj <= mjb;  qj++) {
  2050                   const Vtype *qbuf = qhbuffer + (qjkoff + qj*qni);
  2051                   const Mtype *gbuf = gcbuffer + (gjkoff + qj*gni);
  2054 #ifdef MSM_PROFILING  2058 #if defined(__INTEL_COMPILER)  2059 #pragma vector always   2061                   for (
int ii = 0;  ii < 8;  ii++) {
  2062                     ehsum += gbuf[ii] * qbuf[ii];
  2069               int qnji = qnj * qni;
  2070               int qkoff = -qka*qnji - qja*qni - qia + mia;
  2071               int gnji = gnj * gni;
  2072               int gkoff = (-k-gka)*gnji + (-j-gja)*gni - i - gia + mia;
  2074               for (
int qk = mka;  qk <= mkb;  qk++) {
  2075                 int qjkoff = qkoff + qk*qnji;
  2076                 int gjkoff = gkoff + qk*gnji;
  2078                 for (
int qj = mja;  qj <= mjb;  qj++) {
  2079                   const Vtype *qbuf = qhbuffer + (qjkoff + qj*qni);
  2080                   const Mtype *gbuf = gcbuffer + (gjkoff + qj*gni);
  2083 #ifdef MSM_PROFILING  2087 #if defined(__INTEL_COMPILER)  2088 #pragma vector always   2090                   for (
int ii = 0;  ii < nn;  ii++) {
  2091                     ehsum += gbuf[ii] * qbuf[ii];
  2101             ehbuffer[index] = ehsum;
  2106 #endif // !MSM_COMM_ONLY  2108 #ifdef MSM_PROFILING  2116 #ifdef MSM_FOLD_FACTOR  2125 #ifdef DEBUG_MSM_GRID  2126         printf(
"level=%d   ehfold:  [%d..%d] x [%d..%d] x [%d..%d]  "  2128                 "              eh:  [%d..%d] x [%d..%d] x [%d..%d]  "  2130                "         eh lower:  %d %d %d\n",
  2150         for (
int k = ka;  k <= kb;  k++) {
  2152           if      (kk < 
eka)  
do { kk += 
enk; } 
while (kk < 
eka);
  2153           else if (kk > 
ekb)  
do { kk -= 
enk; } 
while (kk > 
ekb);
  2154           int koff = (kk - 
eka) * 
enj;
  2155           for (
int j = ja;  j <= jb;  j++) {
  2157             if      (jj < 
eja)  
do { jj += 
enj; } 
while (jj < 
eja);
  2158             else if (jj > 
ejb)  
do { jj -= 
enj; } 
while (jj > 
ejb);
  2159             int jkoff = (koff + (jj - 
eja)) * 
eni;
  2160             for (
int i = ia;  i <= ib;  i++, index++) {
  2162               if      (ii < 
eia)  
do { ii += 
eni; } 
while (ii < 
eia);
  2163               else if (ii > 
eib)  
do { ii -= 
eni; } 
while (ii > 
eib);
  2164               int ijkoff = jkoff + (ii - 
eia);
  2165               ehbuf[ijkoff] += ehfoldbuf[index];
  2174 #else    // !MSM_FOLD_FACTOR  2177 #endif   // MSM_FOLD_FACTOR  2180       stopTime = CkWallTimer();
  2194   public CBase_MsmGridCutoff,
  2200 #ifdef MSM_REDUCE_GRID  2202 #endif // MSM_REDUCE_GRID  2207 #if  ! defined(MSM_MIGRATION)  2209 #else // MSM_MIGRATION  2210       : CBase_MsmGridCutoff(m) {
  2211 #ifdef DEBUG_MSM_MIGRATE  2212       printf(
"MsmGridCutoff element %d migrated to processor %d\n",
  2213           thisIndex, CkMyPe());
  2223     virtual void pup(PUP::er& p) {
  2224 #ifdef DEBUG_MSM_MIGRATE  2225       printf(
"MsmGridCutoff element %d pupped on processor %d\n",
  2226           thisIndex, CkMyPe());
  2228       CBase_MsmGridCutoff::pup(p);  
  2231 #endif // MSM_MIGRATION  2245 #ifdef MSM_REDUCE_GRID  2252 #endif // MSM_REDUCE_GRID  2253 #ifdef DEBUG_MSM_GRID  2254       printf(
"MsmGridCutoff[%d]:  setup()"  2255           " send to level=%d block=(%d,%d,%d)\n",
  2264 #ifdef DEBUG_MSM_GRID  2265       CkPrintf(
"MSM GRID CUTOFF %d setup section on PE %d\n",
  2266           thisIndex, CkMyPe());
  2268       CkGetSectionInfo(
cookie, msg);  
  2274 #ifdef DEBUG_MSM_GRID  2275       printf(
"MsmGridCutoff %d:  compute()\n", thisIndex);
  2281       double startTime, stopTime;
  2282       startTime = CkWallTimer();
  2284 #ifdef MSM_REDUCE_GRID  2287       CProxy_CkMulticastMgr mcastProxy =
  2288         CkpvAccess(BOCclass_group).multicastMgr;
  2289       CkMulticastMgr *mcastPtr =
  2290         CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
  2291       CkCallback cb(CkIndex_MsmBlock::sumReducedPotential(NULL),
  2296       mcastPtr->contribute(
  2298           CkReduction::sum_float, 
cookie, cb);
  2309           bindex.
n.
i, bindex.
n.
j, bindex.
n.
k).addPotential(gm);
  2311 #endif // MSM_REDUCE_GRID  2314       stopTime = CkWallTimer();
  2328   public CBase_MsmC1HermiteGridCutoff,
  2334 #ifdef MSM_REDUCE_GRID  2336 #endif // MSM_REDUCE_GRID  2341 #if  ! defined(MSM_MIGRATION)  2343 #else // MSM_MIGRATION  2344       : CBase_MsmC1HermiteGridCutoff(m) {
  2345 #ifdef DEBUG_MSM_MIGRATE  2346       printf(
"MsmC1HermiteGridCutoff element %d migrated to processor %d\n",
  2347           thisIndex, CkMyPe());
  2357     virtual void pup(PUP::er& p) {
  2358 #ifdef DEBUG_MSM_MIGRATE  2359       printf(
"MsmC1HermiteGridCutoff element %d pupped on processor %d\n",
  2360           thisIndex, CkMyPe());
  2362       CBase_MsmC1HermiteGridCutoff::pup(p);  
  2365 #endif // MSM_MIGRATION  2379 #ifdef DEBUG_MSM_GRID  2380       printf(
"MsmC1HermiteGridCutoff[%d]:  setup()"  2381           " send to level=%d block=(%d,%d,%d)\n",
  2387 #ifdef MSM_REDUCE_GRID  2394 #endif // MSM_REDUCE_GRID  2398 #ifdef DEBUG_MSM_GRID  2399       CkPrintf(
"MSM C1 HERMITE GRID CUTOFF %d setup section on PE %d\n",
  2400           thisIndex, CkMyPe());
  2402       CkGetSectionInfo(
cookie, msg);  
  2408 #ifdef DEBUG_MSM_GRID  2409       printf(
"MsmC1HermiteGridCutoff %d:  compute()\n", thisIndex);
  2419       double startTime, stopTime;
  2420       startTime = CkWallTimer();
  2422 #ifdef MSM_REDUCE_GRID  2425       CProxy_CkMulticastMgr mcastProxy =
  2426         CkpvAccess(BOCclass_group).multicastMgr;
  2427       CkMulticastMgr *mcastPtr =
  2428         CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
  2429       CkCallback cb(CkIndex_MsmC1HermiteBlock::sumReducedPotential(NULL),
  2434       mcastPtr->contribute(
  2436           CkReduction::sum_float, 
cookie, cb);
  2447           bindex.
n.
i, bindex.
n.
j, bindex.
n.
k).addPotential(gm);
  2449 #endif // MSM_REDUCE_GRID  2452       stopTime = CkWallTimer();
  2465       double startTime, stopTime;
  2466       startTime = CkWallTimer();
  2476       stopTime = CkWallTimer();
  2486       startTime = stopTime;
  2492       int gia = 
pgc->
ia();
  2493       int gib = 
pgc->
ib();
  2494       int gja = 
pgc->
ja();
  2495       int gjb = 
pgc->
jb();
  2496       int gka = 
pgc->
ka();
  2497       int gkb = 
pgc->
kb();
  2498       int gni = 
pgc->
ni();
  2499       int gnj = 
pgc->
nj();
  2523 #ifdef DEBUG_MEMORY_ALIGNMENT  2524       printf(
"gcbuffer mem:  addr=%p  div32=%lu  mod32=%lu\n",
  2526           (
unsigned long)(gcbuffer)/32,
  2527           (
unsigned long)(gcbuffer)%32);
  2528       printf(
"qhbuffer mem:  addr=%p  div32=%lu  mod32=%lu\n",
  2530           (
unsigned long)(qhbuffer)/32,
  2531           (
unsigned long)(qhbuffer)%32);
  2532       printf(
"ehbuffer mem:  addr=%p  div32=%lu  mod32=%lu\n",
  2534           (
unsigned long)(ehbuffer)/32,
  2535           (
unsigned long)(ehbuffer)%32);
  2538 #ifndef MSM_COMM_ONLY  2540       for (
int k = ka;  k <= kb;  k++) {
  2542         int mka = ( qka >= gka + k ? qka : gka + k );
  2543         int mkb = ( qkb <= gkb + k ? qkb : gkb + k );
  2545         for (
int j = ja;  j <= jb;  j++) {
  2547           int mja = ( qja >= gja + j ? qja : gja + j );
  2548           int mjb = ( qjb <= gjb + j ? qjb : gjb + j );
  2550           for (
int i = ia;  i <= ib;  i++) {
  2552             int mia = ( qia >= gia + i ? qia : gia + i );
  2553             int mib = ( qib <= gib + i ? qib : gib + i );
  2559             int nn = mib - mia + 1;
  2562               int qnji = qnj * qni;
  2563               int qkoff = -qka*qnji - qja*qni - qia + mia;
  2564               int gnji = gnj * gni;
  2565               int gkoff = (-k-gka)*gnji + (-j-gja)*gni - i - gia + mia;
  2567               for (
int qk = mka;  qk <= mkb;  qk++) {
  2568                 int qjkoff = qkoff + qk*qnji;
  2569                 int gjkoff = gkoff + qk*gnji;
  2571                 for (
int qj = mja;  qj <= mjb;  qj++) {
  2572                   const C1Vector *qbuf = qhbuffer + (qjkoff + qj*qni);
  2573                   const C1Matrix *gbuf = gcbuffer + (gjkoff + qj*gni);
  2574 #ifdef MSM_PROFILING  2578 #if defined(__INTEL_COMPILER)  2579 #pragma vector always   2581                   for (
int ii = 0;  ii < nn;  ii++) {
  2584                     ehsum += gbuf[ii] * qbuf[ii];
  2588                     if ( *((
int *)(gbuf)) != 0) {
  2591 #if defined(__INTEL_COMPILER)  2592 #pragma vector always  2596                           ehsum.
velem[jm] += gbuf->melem[km] * qbuf->velem[im];
  2609             ehbuffer[index] = ehsum;
  2614 #endif // !MSM_COMM_ONLY  2616 #ifdef MSM_PROFILING  2624 #ifdef MSM_FOLD_FACTOR  2633 #ifdef DEBUG_MSM_GRID  2634         printf(
"level=%d   ehfold:  [%d..%d] x [%d..%d] x [%d..%d]  "  2636                 "              eh:  [%d..%d] x [%d..%d] x [%d..%d]  "  2638                "         eh lower:  %d %d %d\n",
  2658         for (
int k = ka;  k <= kb;  k++) {
  2660           if      (kk < 
eka)  
do { kk += 
enk; } 
while (kk < 
eka);
  2661           else if (kk > 
ekb)  
do { kk -= 
enk; } 
while (kk > 
ekb);
  2662           int koff = (kk - 
eka) * 
enj;
  2663           for (
int j = ja;  j <= jb;  j++) {
  2665             if      (jj < 
eja)  
do { jj += 
enj; } 
while (jj < 
eja);
  2666             else if (jj > 
ejb)  
do { jj -= 
enj; } 
while (jj > 
ejb);
  2667             int jkoff = (koff + (jj - 
eja)) * 
eni;
  2668             for (
int i = ia;  i <= ib;  i++, index++) {
  2670               if      (ii < 
eia)  
do { ii += 
eni; } 
while (ii < 
eia);
  2671               else if (ii > 
eib)  
do { ii -= 
eni; } 
while (ii > 
eib);
  2672               int ijkoff = jkoff + (ii - 
eia);
  2673               ehbuf[ijkoff] += ehfoldbuf[index];
  2682 #else    // !MSM_FOLD_FACTOR  2685 #endif   // MSM_FOLD_FACTOR  2688       stopTime = CkWallTimer();
  2732 template <
class Vtype, 
class Mtype>
  2741 #ifndef MSM_GRID_CUTOFF_DECOMP  2762 #ifndef MSM_GRID_CUTOFF_DECOMP  2785 #ifndef MSM_GRID_CUTOFF_DECOMP  2786     void gridCutoffKernel();
  2792 template <
class Vtype, 
class Mtype>
  2794   blockIndex = bindex;
  2795   mgrProxy = CProxy_ComputeMsmMgr(CkpvAccess(BOCclass_group).computeMsmMgr);
  2796   mgrLocal = CProxy_ComputeMsmMgr::ckLocalBranch(
  2797       CkpvAccess(BOCclass_group).computeMsmMgr);
  2799   bd = &(map->
blockLevel[blockIndex.level](blockIndex.n));
  2800   qh.
init( bd->nrange );
  2801   eh.
init( bd->nrange );
  2802 #ifndef MSM_GRID_CUTOFF_DECOMP  2803   ehCutoff.init( bd->nrangeCutoff );
  2805   qhRestricted.init( bd->nrangeRestricted );
  2806   ehProlongated.init( bd->nrangeProlongated );
  2807 #ifdef DEBUG_MSM_GRID  2808   printf(
"MsmBlockKernel level=%d, n=%d %d %d:  constructor\n",
  2809       blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
  2812   mgrLocal->addTiming();
  2818 template <
class Vtype, 
class Mtype>
  2822 #ifndef MSM_GRID_CUTOFF_DECOMP  2825   qhRestricted.reset(0);
  2826   ehProlongated.reset(0);
  2828   cntRecvsPotential = 0;
  2832 template <
class Vtype, 
class Mtype>
  2835 #ifdef DEBUG_MSM_GRID  2836   printf(
"MsmBlockKernel level=%d, id=%d %d %d:  restriction\n",
  2837       blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
  2841   double startTime, stopTime;
  2842   startTime = CkWallTimer();
  2845 #ifndef MSM_COMM_ONLY  2847   const int approx = mgrLocal->
approx;
  2861   int ia2 = qhRestricted.ia();
  2862   int ib2 = qhRestricted.ib();
  2863   int ja2 = qhRestricted.ja();
  2864   int jb2 = qhRestricted.jb();
  2865   int ka2 = qhRestricted.ka();
  2866   int kb2 = qhRestricted.kb();
  2869   qhRestricted.reset(0);
  2872   for (
int k2 = ka2;  k2 <= kb2;  k2++) {
  2874     for (
int j2 = ja2;  j2 <= jb2;  j2++) {
  2876       for (
int i2 = ia2;  i2 <= ib2;  i2++) {
  2880         Vtype& q2hsum = qhRestricted(i2,j2,k2);
  2882         for (
int k = 0;  k < nstencil;  k++) {
  2883           int kn = k1 + offset[k];
  2884           if      (kn < ka1) 
continue;
  2885           else if (kn > kb1) 
break;
  2887           for (
int j = 0;  j < nstencil;  j++) {
  2888             int jn = j1 + offset[j];
  2889             if      (jn < ja1) 
continue;
  2890             else if (jn > jb1) 
break;
  2892             for (
int i = 0;  i < nstencil;  i++) {
  2893               int in = i1 + offset[i];
  2894               if      (in < ia1) 
continue;
  2895               else if (in > ib1) 
break;
  2897               q2hsum += res(i,j,k) * qh(in,jn,kn);
  2906   qhRestricted.reset(0);
  2907 #endif // !MSM_COMM_ONLY  2910   stopTime = CkWallTimer();
  2916 #ifndef MSM_GRID_CUTOFF_DECOMP  2917 template <
class Vtype, 
class Mtype>
  2920 #ifdef DEBUG_MSM_GRID  2921   printf(
"MsmBlockKernel level=%d, id=%d %d %d:  grid cutoff\n",
  2922       blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
  2925   double startTime, stopTime;
  2926   startTime = CkWallTimer();
  2928 #ifndef MSM_COMM_ONLY  2946   int ia = ehCutoff.ia();
  2947   int ib = ehCutoff.ib();
  2948   int ja = ehCutoff.ja();
  2949   int jb = ehCutoff.jb();
  2950   int ka = ehCutoff.ka();
  2951   int kb = ehCutoff.kb();
  2955   for (
int k = ka;  k <= kb;  k++) {
  2956     for (
int j = ja;  j <= jb;  j++) {
  2957       for (
int i = ia;  i <= ib;  i++) {
  2959         int mia = ( qia >= gia + i ? qia : gia + i );
  2960         int mib = ( qib <= gib + i ? qib : gib + i );
  2961         int mja = ( qja >= gja + j ? qja : gja + j );
  2962         int mjb = ( qjb <= gjb + j ? qjb : gjb + j );
  2963         int mka = ( qka >= gka + k ? qka : gka + k );
  2964         int mkb = ( qkb <= gkb + k ? qkb : gkb + k );
  2966         Vtype& ehsum = ehCutoff(i,j,k);
  2968         for (
int qk = mka;  qk <= mkb;  qk++) {
  2969           for (
int qj = mja;  qj <= mjb;  qj++) {
  2970             for (
int qi = mia;  qi <= mib;  qi++) {
  2971               ehsum += gc(qi-i, qj-j, qk-k) * qh(qi,qj,qk);
  2981 #endif // !MSM_COMM_ONLY  2983   stopTime = CkWallTimer();
  2987 #endif // MSM_GRID_CUTOFF_DECOMP  2990 template <
class Vtype, 
class Mtype>
  2993 #ifdef DEBUG_MSM_GRID  2994   printf(
"MsmBlockKernel level=%d, id=%d %d %d:  prolongation\n",
  2995       blockIndex.level, blockIndex.n.i, blockIndex.n.j, blockIndex.n.k);
  2999   double startTime, stopTime;
  3000   startTime = CkWallTimer();
  3002 #ifndef MSM_COMM_ONLY  3004   const int approx = mgrLocal->
approx;
  3010   int ia1 = ehProlongated.
ia();
  3011   int ib1 = ehProlongated.ib();
  3012   int ja1 = ehProlongated.ja();
  3013   int jb1 = ehProlongated.jb();
  3014   int ka1 = ehProlongated.ka();
  3015   int kb1 = ehProlongated.kb();
  3026   for (
int k2 = ka2;  k2 <= kb2;  k2++) {
  3028     for (
int j2 = ja2;  j2 <= jb2;  j2++) {
  3030       for (
int i2 = ia2;  i2 <= ib2;  i2++) {
  3034         for (
int k = 0;  k < nstencil;  k++) {
  3035           int kn = k1 + offset[k];
  3036           if      (kn < ka1) 
continue;
  3037           else if (kn > kb1) 
break;
  3039           for (
int j = 0;  j < nstencil;  j++) {
  3040             int jn = j1 + offset[j];
  3041             if      (jn < ja1) 
continue;
  3042             else if (jn > jb1) 
break;
  3044             for (
int i = 0;  i < nstencil;  i++) {
  3045               int in = i1 + offset[i];
  3046               if      (in < ia1) 
continue;
  3047               else if (in > ib1) 
break;
  3049               ehProlongated(in,jn,kn) += pro(i,j,k) * eh(i2,j2,k2);
  3058   ehProlongated.reset(0);
  3059 #endif // !MSM_COMM_ONLY  3061   stopTime = CkWallTimer();
  3072   public CBase_MsmBlock,
  3081           msm::BlockIndex(level,
  3082             msm::Ivec(thisIndex.x, thisIndex.y, thisIndex.z))
  3085 #ifndef MSM_GRID_CUTOFF_DECOMP  3097       double startTime, stopTime;
  3098       startTime = CkWallTimer();
  3102       memcpy(ehfull.
data().
buffer(), msg->getData(), msg->getSize());
  3111       stopTime = CkWallTimer();
  3125 #ifndef MSM_GRID_CUTOFF_DECOMP  3126     void sendAcrossPotential();
  3142 #ifdef DEBUG_MSM_GRID  3143   CkPrintf(
"LEVEL %d MSM BLOCK (%d,%d,%d):  "  3144       "creating broadcast section on PE %d\n",
  3147   std::vector<CkArrayIndex1D> elems;
  3155   CProxy_CkMulticastMgr mcastProxy = CkpvAccess(BOCclass_group).multicastMgr;
  3156   CkMulticastMgr *mcastPtr = CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
  3159 #ifdef DEBUG_MSM_GRID  3161   sprintf(s, 
"LEVEL %d MSM BLOCK (%d,%d,%d):  "  3162       "creating reduction section on PE %d\n",
  3165   std::vector<CkArrayIndex1D> elems2;
  3167 #ifdef DEBUG_MSM_GRID  3168   strcat(s, 
"receiving from MsmGridCutoff ID:");
  3171 #ifdef DEBUG_MSM_GRID  3178 #ifdef DEBUG_MSM_GRID  3187   CProxyElement_MsmBlock thisElementProxy = thisProxy(thisIndex);
  3188   msg->
put(&thisElementProxy);
  3205   double startTime, stopTime;
  3206   startTime = CkWallTimer();
  3213   stopTime = CkWallTimer();
  3229   double startTime, stopTime;
  3230   startTime = CkWallTimer();
  3234   for (
int n = 0;  n < 
bd->
sendUp.len();  n++) {
  3255   stopTime = CkWallTimer();
  3263 #ifdef DEBUG_MSM_GRID  3264   printf(
"MsmBlock level=%d, id=%d %d %d:  grid cutoff\n",
  3267 #ifndef MSM_GRID_CUTOFF_DECOMP  3269   sendAcrossPotential();
  3270 #else // MSM_GRID_CUTOFF_DECOMP  3274   double startTime, stopTime;
  3275   startTime = CkWallTimer();
  3283   for (
int n = 0;  n < len;  n++) {
  3285     startTime = CkWallTimer();
  3292     stopTime = CkWallTimer();
  3305   stopTime = CkWallTimer();
  3311 #endif // MSM_GRID_CUTOFF_DECOMP  3316 #ifndef MSM_GRID_CUTOFF_DECOMP  3317 void MsmBlock::sendAcrossPotential()
  3320   double startTime, stopTime;
  3321   startTime = CkWallTimer();
  3346   stopTime = CkWallTimer();
  3356   double startTime, stopTime;
  3357   startTime = CkWallTimer();
  3365   stopTime = CkWallTimer();
  3382   double startTime, stopTime;
  3383   startTime = CkWallTimer();
  3388   for (
int n = 0;  n < 
bd->
sendDown.len();  n++) {
  3408   stopTime = CkWallTimer();
  3419   double startTime, stopTime;
  3420   startTime = CkWallTimer();
  3442     int pe = pm->
node(pid);
  3446   stopTime = CkWallTimer();
  3459   public CBase_MsmC1HermiteBlock,
  3468           msm::BlockIndex(level,
  3469             msm::Ivec(thisIndex.x, thisIndex.y, thisIndex.z))
  3472       int isfirstlevel = (level == 0);
  3478 #ifndef MSM_GRID_CUTOFF_DECOMP  3492       double startTime, stopTime;
  3493       startTime = CkWallTimer();
  3497       memcpy(ehfull.
data().
buffer(), msg->getData(), msg->getSize());
  3506       stopTime = CkWallTimer();
  3520 #ifndef MSM_GRID_CUTOFF_DECOMP  3521     void sendAcrossPotential();
  3537 #ifdef DEBUG_MSM_GRID  3538   CkPrintf(
"LEVEL %d MSM C1 HERMITE BLOCK (%d,%d,%d):  "  3539       "creating broadcast section on PE %d\n",
  3542   std::vector<CkArrayIndex1D> elems;
  3550   CProxy_CkMulticastMgr mcastProxy = CkpvAccess(BOCclass_group).multicastMgr;
  3551   CkMulticastMgr *mcastPtr = CProxy_CkMulticastMgr(mcastProxy).ckLocalBranch();
  3554 #ifdef DEBUG_MSM_GRID  3556   sprintf(s, 
"LEVEL %d MSM C1 HERMITE BLOCK (%d,%d,%d):  "  3557       "creating reduction section on PE %d\n",
  3560   std::vector<CkArrayIndex1D> elems2;
  3562 #ifdef DEBUG_MSM_GRID  3563   strcat(s, 
"receiving from MsmC1HermiteGridCutoff ID:");
  3566 #ifdef DEBUG_MSM_GRID  3573 #ifdef DEBUG_MSM_GRID  3582   CProxyElement_MsmC1HermiteBlock thisElementProxy = thisProxy(thisIndex);
  3583   msg->
put(&thisElementProxy);
  3600   double startTime, stopTime;
  3601   startTime = CkWallTimer();
  3608   stopTime = CkWallTimer();
  3624   double startTime, stopTime;
  3625   startTime = CkWallTimer();
  3629   for (
int n = 0;  n < 
bd->
sendUp.len();  n++) {
  3650   stopTime = CkWallTimer();
  3658 #ifdef DEBUG_MSM_GRID  3659   printf(
"MsmC1HermiteBlock level=%d, id=%d %d %d:  grid cutoff\n",
  3662 #ifndef MSM_GRID_CUTOFF_DECOMP  3664   sendAcrossPotential();
  3665 #else // MSM_GRID_CUTOFF_DECOMP  3669   double startTime, stopTime;
  3670   startTime = CkWallTimer();
  3678   for (
int n = 0;  n < len;  n++) {
  3680     startTime = CkWallTimer();
  3687     stopTime = CkWallTimer();
  3700   stopTime = CkWallTimer();
  3706 #endif // MSM_GRID_CUTOFF_DECOMP  3711 #ifndef MSM_GRID_CUTOFF_DECOMP  3712 void MsmC1HermiteBlock::sendAcrossPotential()
  3715   double startTime, stopTime;
  3716   startTime = CkWallTimer();
  3741   stopTime = CkWallTimer();
  3751   double startTime, stopTime;
  3752   startTime = CkWallTimer();
  3760   stopTime = CkWallTimer();
  3777   double startTime, stopTime;
  3778   startTime = CkWallTimer();
  3783   for (
int n = 0;  n < 
bd->
sendDown.len();  n++) {
  3803   stopTime = CkWallTimer();
  3814   double startTime, stopTime;
  3815   startTime = CkWallTimer();
  3837     int pe = pm->
node(pid);
  3841   stopTime = CkWallTimer();
  3855   msmProxy(thisgroup), msmCompute(0)
  3857 #ifdef DEBUG_MSM_VERBOSE  3858   printf(
"ComputeMsmMgr:  (constructor) PE %d\n", CkMyPe());
  3860   CkpvAccess(BOCclass_group).computeMsmMgr = thisgroup;
  3863   if (CkMyPe() == 0) {
  3864     msmTimer = CProxy_MsmTimer::ckNew();
  3868 #ifdef MSM_PROFILING  3869   if (CkMyPe() == 0) {
  3870     msmProfiler = CProxy_MsmProfiler::ckNew();
  3878 #ifdef DEBUG_MSM_VERBOSE  3879   printf(
"ComputeMsmMgr:  (destructor) PE %d\n", CkMyPe());
  3894     int& ia, 
int& ib, 
int isperiodic)
  3899     const BigReal hmax = 1.5 * hmin;
  3902     while (hh >= hmax) {
  3908         NAMD_die(
"Basis vector is too short or MSM grid spacing is too large");
  3923     nn = (int) floor(len/hh + 0.5);
  3934   if (n % bsize != 0) {
  3937     if (n % 3 == 0) newbsize = 3;
  3938     while (newbsize < bsize && newbsize < n) newbsize *= 2;
  3939     if (bsize < newbsize) newbsize /= 2;
  3940     if (n % newbsize != 0) {
  3941       NAMD_die(
"MSM grid size for periodic dimensions must be "  3942           "a power of 2 times at most one power of 3");
  3971 #ifdef DEBUG_MSM_VERBOSE  3972   printf(
"ComputeMsmMgr:  initialize() PE %d\n", CkMyPe());
  3980   printf(
"PE%d: initializing MSM\n", CkMyPe());
  3994   if (sdmin.
x > sdmax.
x) { 
double t=sdmin.
x; sdmin.
x=sdmax.
x; sdmax.
x=t; }
  3995   if (sdmin.
y > sdmax.
y) { 
double t=sdmin.
y; sdmin.
y=sdmax.
y; sdmax.
y=t; }
  3996   if (sdmin.
z > sdmax.
z) { 
double t=sdmin.
z; sdmin.
z=sdmax.
z; sdmax.
z=t; }
  3998   if ( ! 
lattice.
a_p() && (sdmin.
x != 0 || sdmax.
x != 0)) {
  4001       if (CkMyPe() == 0) {
  4002         iout << 
iINFO << 
"MSM extending minimum X to "  4008       if (CkMyPe() == 0) {
  4009         iout << 
iINFO << 
"MSM extending maximum X to "  4014   if ( ! 
lattice.
b_p() && (sdmin.
y != 0 || sdmax.
y != 0)) {
  4017       if (CkMyPe() == 0) {
  4018         iout << 
iINFO << 
"MSM extending minimum Y to "  4024       if (CkMyPe() == 0) {
  4025         iout << 
iINFO << 
"MSM extending maximum Y to "  4030   if ( ! 
lattice.
c_p() && (sdmin.
z != 0 || sdmax.
z != 0)) {
  4033       if (CkMyPe() == 0) {
  4034         iout << 
iINFO << 
"MSM extending minimum Z to "  4040       if (CkMyPe() == 0) {
  4041         iout << 
iINFO << 
"MSM extending maximum Z to "  4047 #ifdef DEBUG_MSM_VERBOSE  4048   printf(
"smin = %g %g %g  smax = %g %g %g\n",
  4054     NAMD_die(
"MSM: unknown approximation requested (MSMApprox)");
  4059     NAMD_die(
"MSM: unknown splitting requested (MSMSplit)");
  4062   if (CkMyPe() == 0) {
  4063     const char *approx_str, *split_str;
  4065       case CUBIC:      approx_str = 
"C1 cubic";    
break;
  4066       case QUINTIC:    approx_str = 
"C1 quintic";  
break;
  4067       case QUINTIC2:   approx_str = 
"C2 quintic";  
break;
  4068       case SEPTIC:     approx_str = 
"C1 septic";   
break;
  4069       case SEPTIC3:    approx_str = 
"C3 septic";   
break;
  4070       case NONIC:      approx_str = 
"C1 nonic";    
break;
  4071       case NONIC4:     approx_str = 
"C4 nonic";    
break;
  4072       case C1HERMITE:  approx_str = 
"C1 Hermite";  
break;
  4073       default:         approx_str = 
"unknown";     
break;
  4076       case TAYLOR2:  split_str = 
"C2 Taylor";   
break;
  4077       case TAYLOR3:  split_str = 
"C3 Taylor";   
break;
  4078       case TAYLOR4:  split_str = 
"C4 Taylor";   
break;
  4079       case TAYLOR5:  split_str = 
"C5 Taylor";   
break;
  4080       case TAYLOR6:  split_str = 
"C6 Taylor";   
break;
  4081       case TAYLOR7:  split_str = 
"C7 Taylor";   
break;
  4082       case TAYLOR8:  split_str = 
"C8 Taylor";   
break;
  4083       default:       split_str = 
"unknown";     
break;
  4086                   << approx_str << 
" interpolation\n";
  4088                   << split_str << 
" splitting function\n";
  4102     NAMD_die(
"MSM: grid spacing must be greater than 0 (MSMGridSpacing)");
  4105     NAMD_die(
"MSM: grid spacing must be less than cutoff (MSMGridSpacing)");
  4110     NAMD_die(
"MSM: padding must be non-negative (MSMPadding)");
  4119     NAMD_die(
"MSM: requested splitting for long-range dispersion "  4120         "(not implemented)");
  4127   if (bsx <= 0 || bsy <= 0 || bsz <= 0) {
  4128     NAMD_die(
"MSM: invalid block size requested (MSMBlockSize[XYZ])");
  4130 #ifdef MSM_FIXED_SIZE_GRID_MSG  4132     NAMD_die(
"MSM: requested block size (MSMBlockSize[XYZ]) too big");
  4135   if (CkMyPe() == 0) {
  4136     iout << 
iINFO << 
"MSM block size decomposition along X is "  4137                   << bsx << 
" grid points\n";
  4138     iout << 
iINFO << 
"MSM block size decomposition along Y is "  4139                   << bsy << 
" grid points\n";
  4140     iout << 
iINFO << 
"MSM block size decomposition along Z is "  4141                   << bsz << 
" grid points\n";
  4152   int ispany = (ispx || ispy || ispz);  
  4168     xlen = sgmax.
x - sgmin.
x;
  4170 #ifdef DEBUG_MSM_VERBOSE  4171   printf(
"xlen = %g   sgmin.x = %g   sgmax.x = %g\n", xlen, sgmin.
x, sgmax.
x);
  4188     ylen = sgmax.
y - sgmin.
y;
  4190 #ifdef DEBUG_MSM_VERBOSE  4191   printf(
"ylen = %g   sgmin.y = %g   sgmax.y = %g\n", ylen, sgmin.
y, sgmax.
y);
  4208     zlen = sgmax.
z - sgmin.
z;
  4210 #ifdef DEBUG_MSM_VERBOSE  4211   printf(
"zlen = %g   sgmin.z = %g   sgmax.z = %g\n", zlen, sgmin.
z, sgmax.
z);
  4215   int ia, ib, ja, jb, ka, kb;
  4222   if (CkMyPe() == 0) {
  4223     if (ispx || ispy || ispz) {
  4224       iout << 
iINFO << 
"MSM grid spacing along X is "<< 
hxlen << 
" A\n";
  4225       iout << 
iINFO << 
"MSM grid spacing along Y is "<< 
hylen << 
" A\n";
  4226       iout << 
iINFO << 
"MSM grid spacing along Z is "<< 
hzlen << 
" A\n";
  4231     if ( ! ispx || ! ispy || ! ispz ) {
  4236   int ni = ib - ia + 1;
  4237   int nj = jb - ja + 1;
  4238   int nk = kb - ka + 1;
  4250   int smallestnbox = 1;
  4254 #ifdef DEBUG_MSM_VERBOSE  4255   printf(
"maxlevels = %d\n", maxlevels);
  4261     for (maxlevels = 1;  n > 0;  n >>= 1)  maxlevels++;
  4264       int ngci = (int) ceil(3*
a / 
hxlen) - 1;
  4265       int ngcj = (int) ceil(3*
a / 
hylen) - 1;
  4266       int ngck = (int) ceil(3*
a / 
hzlen) - 1;
  4268       int nhalf = (int) sqrt((
double)ni * nj * nk);
  4269       lastnelems = (nhalf > omega3 ? nhalf : omega3);
  4270       smallestnbox = ngci * ngcj * ngck;  
  4274 #ifdef DEBUG_MSM_VERBOSE  4275   printf(
"maxlevels = %d\n", maxlevels);
  4290     map.
gridrange[level].setbounds(ia, ib, ja, jb, ka, kb);
  4294     if (++level == 
nlevels)  done |= 0x07;  
  4297       int nelems = ni * nj * nk;
  4298       if (nelems <= lastnelems)    done |= 0x07;
  4299       if (nelems <= smallestnbox)  done |= 0x07;
  4302     alldone = (done == 0x07);  
  4307       if (ni & 1)        done |= 0x07;  
  4308       else if (ni == 2)  done |= 0x01;  
  4311       ia = -((-ia+1)/2) - 
s_edge;
  4314       if (ni <= 
omega)   done |= 0x01;  
  4320       if (nj & 1)        done |= 0x07;  
  4321       else if (nj == 2)  done |= 0x02;  
  4324       ja = -((-ja+1)/2) - 
s_edge;
  4327       if (nj <= 
omega)   done |= 0x02;  
  4333       if (nk & 1)        done |= 0x07;  
  4334       else if (nk == 2)  done |= 0x04;  
  4337       ka = -((-ka+1)/2) - 
s_edge;
  4340       if (nk <= 
omega)   done |= 0x04;  
  4342   } 
while ( ! alldone );
  4353   if (CkMyPe() == 0) {
  4355     for (n = 0;  n < 
nlevels;  n++) {
  4357       snprintf(s, 
sizeof(s), 
"    level %d:  "  4358           "[%d..%d] x [%d..%d] x [%d..%d]\n", n,
  4413   s = (
hv * pv) / (pv * pv);
  4417   s = (
hw * pw) / (pw * pw);
  4421   ni = (int) ceil(2*
a / pu.length() ) - 1;
  4422   nj = (int) ceil(2*
a / pv.length() ) - 1;
  4423   nk = (int) ceil(2*
a / pw.length() ) - 1;
  4426   Float scaling_factor = 0.5f;
  4430     a_p = a_p * a_p * a_p;   
  4432     scaling_factor = 1.f/64;  
  4440     for (level = 0;  level < toplevel;  level++) {
  4441       map.
gc[level].setbounds(-ni, ni, -nj, nj, -nk, nk);
  4442       map.
gvc[level].setbounds(-ni, ni, -nj, nj, -nk, nk);
  4444       for (k = -nk;  k <= nk;  k++) {
  4445         for (j = -nj;  j <= nj;  j++) {
  4446           for (i = -ni;  i <= ni;  i++) {
  4448               BigReal s, t, gs=0, gt=0, g=0, dgs=0, dgt=0, dg=0;
  4461                     gs = s_1 * s_1 * s_1;  
  4463                     dgs = -6 * gs * s_1;
  4473                 g = (gs - scaling_factor * gt) * a_p;
  4476                   c = a_p * a_1 / vlen;
  4478                 dg = 0.5 * (dgs - 0.5*scaling_factor * dgt) * 
c;
  4487               map.
gc[level](i,j,k) = scaling * 
map.
gc[0](i,j,k);
  4494       scaling *= scaling_factor;
  4501     for (i = 0;  i < 
VMAX;  i++) { 
virial[i] = 0; }
  4510       map.
gc[toplevel].setbounds(-ni, ni, -nj, nj, -nk, nk);
  4514       for (k = -nk;  k <= nk;  k++) {
  4515         for (j = -nj;  j <= nj;  j++) {
  4516           for (i = -ni;  i <= ni;  i++) {
  4523                 gs = s_1 * s_1 * s_1;  
  4533             map.
gc[toplevel](i,j,k) = scaling * 
Float(gs * a_p);
  4543     for (k = 0;  k < nstencil;  k++) {
  4544       for (j = 0;  j < nstencil;  j++) {
  4545         for (i = 0;  i < nstencil;  i++) {
  4546           map.
grespro(i,j,k) = phi[i] * phi[j] * phi[k];
  4558     for (level = 0;  level < toplevel;  level++) {
  4567       for (k = -nk;  k <= nk;  k++) {
  4568         for (j = -nj;  j <= nj;  j++) {
  4569           for (i = -ni;  i <= ni;  i++) {
  4601       for (k = -nk;  k <= nk;  k++) {
  4602         for (j = -nj;  j <= nj;  j++) {
  4603           for (i = -ni;  i <= ni;  i++) {
  4626     const double  PHI0[ND] = { 0.5, 1, 0.5 };
  4627     const double DPHI0[ND] = { 1.5, 0, -1.5 };
  4628     const double  PHI1[ND] = { -0.125, 0, 0.125 };
  4629     const double DPHI1[ND] = { -0.25, 1, -0.25 };
  4632     double  xphi0_base_array[ND];
  4633     double dxphi0_base_array[ND];
  4634     double  yphi0_base_array[ND];
  4635     double dyphi0_base_array[ND];
  4636     double  zphi0_base_array[ND];
  4637     double dzphi0_base_array[ND];
  4638     double  xphi1_base_array[ND];
  4639     double dxphi1_base_array[ND];
  4640     double  yphi1_base_array[ND];
  4641     double dyphi1_base_array[ND];
  4642     double  zphi1_base_array[ND];
  4643     double dzphi1_base_array[ND];
  4645     double *xphi0, *dxphi0, *xphi1, *dxphi1;
  4646     double *yphi0, *dyphi0, *yphi1, *dyphi1;
  4647     double *zphi0, *dzphi0, *zphi1, *dzphi1;
  4649     for (n = 0;  n < ND;  n++) {
  4650       xphi0_base_array[n]  = PHI0[n];
  4651       dxphi0_base_array[n] = 
hxlen_1 * DPHI0[n];  
  4652       xphi1_base_array[n]  = 
hxlen * PHI1[n];     
  4653       dxphi1_base_array[n] = DPHI1[n];
  4654       yphi0_base_array[n]  = PHI0[n];
  4655       dyphi0_base_array[n] = 
hylen_1 * DPHI0[n];  
  4656       yphi1_base_array[n]  = 
hylen * PHI1[n];     
  4657       dyphi1_base_array[n] = DPHI1[n];
  4658       zphi0_base_array[n]  = PHI0[n];
  4659       dzphi0_base_array[n] = 
hzlen_1 * DPHI0[n];  
  4660       zphi1_base_array[n]  = 
hzlen * PHI1[n];     
  4661       dzphi1_base_array[n] = DPHI1[n];
  4663     xphi0  =  xphi0_base_array + NR;  
  4664     dxphi0 = dxphi0_base_array + NR;
  4665     xphi1  =  xphi1_base_array + NR;
  4666     dxphi1 = dxphi1_base_array + NR;
  4667     yphi0  =  yphi0_base_array + NR;
  4668     dyphi0 = dyphi0_base_array + NR;
  4669     yphi1  =  yphi1_base_array + NR;
  4670     dyphi1 = dyphi1_base_array + NR;
  4671     zphi0  =  zphi0_base_array + NR;
  4672     dzphi0 = dzphi0_base_array + NR;
  4673     zphi1  =  zphi1_base_array + NR;
  4674     dzphi1 = dzphi1_base_array + NR;
  4676     for (level = 0;  level < 
nlevels-1;  level++) {
  4686       for (n = -NR;  n <= NR;  n++) {
  4706       for (k = -NR;  k <= NR;  k++) {
  4707         for (j = -NR;  j <= NR;  j++) {
  4708           for (i = -NR;  i <= NR;  i++) {
  4788       for (k = -NR;  k <= NR;  k++) {
  4789         for (j = -NR;  j <= NR;  j++) {
  4790           for (i = -NR;  i <= NR;  i++) {
  4878   if (CkMyPe() == 0) {
  4879     iout << 
iINFO << 
"MSM finished calculating stencils\n" << 
endi;
  4886 #ifdef DEBUG_MSM_VERBOSE  4887   printf(
"numPatches = %d\n", numpatches);
  4895 #ifdef MSM_FOLD_FACTOR  4898   for (level = 0;  level < 
nlevels;  level++) {
  4917 #ifdef MSM_DEBUG_VERBOSE  4918       if (CkMyPe() == 0) {
  4919         printf(
"level = %d\n  map.bs* = %d %d %d  gn* = %d %d %d\n",
  4925       int bni = (gni / 
map.
bsx[level]) + (gni % 
map.
bsx[level] != 0);
  4926       int bnj = (gnj / 
map.
bsy[level]) + (gnj % 
map.
bsy[level] != 0);
  4927       int bnk = (gnk / 
map.
bsz[level]) + (gnk % 
map.
bsz[level] != 0);
  4928 #ifdef MSM_FOLD_FACTOR  4929       if ( (bni == 1 || bnj == 1 || bnk == 1)) {
  4932         if (CkMyPe() == 0) {
  4933           printf(
"Setting MSM FoldFactor level %d:  %d %d %d\n",
  4934               level, bsx / gni, bsy / gnj, bsz / gnk);
  4939       b.
set(0, bni, 0, bnj, 0, bnk);
  4940       for (k = 0;  k < bnk;  k++) {
  4941         for (j = 0;  j < bnj;  j++) {
  4942           for (i = 0;  i < bni;  i++) {
  4944             int ia = gia + i*
map.
bsx[level];
  4945             int ib = ia + 
map.
bsx[level] - 1;
  4946             int ja = gja + j*
map.
bsy[level];
  4947             int jb = ja + 
map.
bsy[level] - 1;
  4948             int ka = gka + k*
map.
bsz[level];
  4949             int kb = ka + 
map.
bsz[level] - 1;
  4950             if (ib >= gia + gni) ib = gia + gni - 1;
  4951             if (jb >= gja + gnj) jb = gja + gnj - 1;
  4952             if (kb >= gka + gnk) kb = gka + gnk - 1;
  4953             b(i,j,k).nrange.
setbounds(ia, ib, ja, jb, ka, kb);
  4972 #ifdef DEBUG_MSM_VERBOSE  4973   printf(
"Done allocating map for grid levels\n");
  4974   printf(
"Grid level decomposition:\n");
  4975   for (level = 0;  level < 
nlevels;  level++) {
  4983     for (k = bka;  k <= bkb;  k++) {
  4984       for (j = bja;  j <= bjb;  j++) {
  4985         for (i = bia;  i <= bib;  i++) {
  4986           int ia = b(i,j,k).nrange.
ia();
  4987           int ib = b(i,j,k).nrange.
ib();
  4988           int ja = b(i,j,k).nrange.
ja();
  4989           int jb = b(i,j,k).nrange.
jb();
  4990           int ka = b(i,j,k).nrange.
ka();
  4991           int kb = b(i,j,k).nrange.
kb();
  4992           printf(
"level=%d  id=%d %d %d  [%d..%d] x [%d..%d] x [%d..%d]"  4994               level, i, j, k, ia, ib, ja, jb, ka, kb,
  4995               b(i,j,k).nrange.
nn());
  5001   if (CkMyPe() == 0) {
  5002     iout << 
iINFO << 
"MSM finished creating map for grid levels\n" << 
endi;
  5009 void ComputeMsmMgr::initialize2()
  5014   int i, j, k, n, level;
  5022   BigReal xmargin = 0.5 * (patchdim - 
a) / sysdima;
  5023   BigReal ymargin = 0.5 * (patchdim - 
a) / sysdimb;
  5024   BigReal zmargin = 0.5 * (patchdim - 
a) / sysdimc;
  5037   for (pid = 0;  pid < numpatches;  pid++) {
  5076     if (ia < ia_min)  ia = ia_min;
  5077     if (ib > ib_max)  ib = ib_max;
  5078     if (ja < ja_min)  ja = ja_min;
  5079     if (jb > jb_max)  jb = jb_max;
  5080     if (ka < ka_min)  ka = ka_min;
  5081     if (kb > kb_max)  kb = kb_max;
  5090     if (mi == 0)      ia = ia_min;
  5091     if (mi == npi-1)  ib = ib_max;
  5092     if (mj == 0)      ja = ja_min;
  5093     if (mj == npj-1)  jb = jb_max;
  5094     if (mk == 0)      ka = ka_min;
  5095     if (mk == npk-1)  kb = kb_max;
  5098     printf(
"patch %d:  grid covering:  [%d..%d] x [%d..%d] x [%d..%d]\n",
  5099         pid, ia, ib, ja, jb, ka, kb);
  5107     int maxarrlen = (bupper.
n.
i - blower.
n.
i + 1) *
  5108       (bupper.
n.
j - blower.
n.
j + 1) * (bupper.
n.
k - blower.
n.
k + 1);
  5109     p.
send.setmax(maxarrlen);  
  5112     printf(
"blower: level=%d  n=%d %d %d   bupper: level=%d  n=%d %d %d\n",
  5114         bupper.
level, bupper.
n.
i, bupper.
n.
j, bupper.
n.
k);
  5117     for (
int kk = blower.
n.
k;  kk <= bupper.
n.
k;  kk++) {
  5118       for (
int jj = blower.
n.
j;  jj <= bupper.
n.
j;  jj++) {
  5119         for (
int ii = blower.
n.
i;  ii <= bupper.
n.
i;  ii++) {
  5121           printf(
"ii=%d  jj=%d  kk=%d\n", ii, jj, kk);
  5147 #ifdef DEBUG_MSM_VERBOSE  5148 if (CkMyPe() == 0) {
  5149   printf(
"Done allocating map for patches\n");
  5150   printf(
"Patch level decomposition:\n");
  5151   for (pid = 0;  pid < numpatches;  pid++) {
  5159     printf(
"patch id=%d  [%d..%d] x [%d..%d] x [%d..%d]\n",
  5160         pid, ia, ib, ja, jb, ka, kb);
  5164   if (CkMyPe() == 0) {
  5165     iout << 
iINFO << 
"MSM finished creating map for patches\n" << 
endi;
  5171   for (level = 0;  level < 
nlevels;  level++) {
  5176 #ifdef MSM_SKIP_BEYOND_SPHERE  5177     int gia, gib, gja, gjb, gka, gkb;
  5187       gia = 
map.
gc[level].ia();
  5188       gib = 
map.
gc[level].ib();
  5189       gja = 
map.
gc[level].ja();
  5190       gjb = 
map.
gc[level].jb();
  5191       gka = 
map.
gc[level].ka();
  5192       gkb = 
map.
gc[level].kb();
  5195 #ifdef MSM_SKIP_TOO_DISTANT_BLOCKS  5196     int bsx = 
map.
bsx[level];
  5197     int bsy = 
map.
bsy[level];
  5198     int bsz = 
map.
bsz[level];
  5200 #ifdef MSM_FOLD_FACTOR  5207     for (k = 0;  k < bnk;  k++) {
  5208       for (j = 0;  j < bnj;  j++) {
  5209         for (i = 0;  i < bni;  i++) {
  5212           int ia = b(i,j,k).nrange.
ia();
  5213           int ib = b(i,j,k).nrange.
ib();
  5214           int ja = b(i,j,k).nrange.
ja();
  5215           int jb = b(i,j,k).nrange.
jb();
  5216           int ka = b(i,j,k).nrange.
ka();
  5217           int kb = b(i,j,k).nrange.
kb();
  5227             ia += 
map.
gc[level].ia();
  5228             ib += 
map.
gc[level].ib();
  5229             ja += 
map.
gc[level].ja();
  5230             jb += 
map.
gc[level].jb();
  5231             ka += 
map.
gc[level].ka();
  5232             kb += 
map.
gc[level].kb();
  5238 #ifdef MSM_FOLD_FACTOR  5245           int maxarrlen = (bupper.
n.
i - blower.
n.
i + 1) *
  5246             (bupper.
n.
j - blower.
n.
j + 1) * (bupper.
n.
k - blower.
n.
k + 1);
  5247           b(i,j,k).sendAcross.setmax(maxarrlen);  
  5248           b(i,j,k).indexGridCutoff.setmax(maxarrlen);  
  5249           b(i,j,k).recvGridCutoff.setmax(maxarrlen);  
  5255             printf(
"ME %4d   [%d..%d] x [%d..%d] x [%d..%d]\n",
  5262           for (kk = blower.
n.
k;  kk <= bupper.
n.
k;  kk++) {
  5263             for (jj = blower.
n.
j;  jj <= bupper.
n.
j;  jj++) {
  5264               for (ii = blower.
n.
i;  ii <= bupper.
n.
i;  ii++) {
  5265 #ifdef MSM_SKIP_TOO_DISTANT_BLOCKS  5267                 int si = 
sign(ii-i);
  5268                 int sj = 
sign(jj-j);
  5269                 int sk = 
sign(kk-k);
  5270                 int di = (ii-i)*bsx + si*(1-bsx);
  5271                 int dj = (jj-j)*bsy + sj*(1-bsy);
  5272                 int dk = (kk-k)*bsz + sk*(1-bsz);
  5280 #ifdef MSM_FOLD_FACTOR  5282                     b(i,j,k).nrangeCutoff);
  5286                     b(i,j,k).nrangeCutoff);
  5289 #ifdef MSM_SKIP_BEYOND_SPHERE  5291                 printf(
"send to volume %4d   [%d..%d] x [%d..%d] x [%d..%d]\n",
  5305                 int inc_in = (bn.
ni() > 1 ? bn.
ni()-1 : 1);
  5306                 int inc_jn = (bn.
nj() > 1 ? bn.
nj()-1 : 1);
  5307                 int inc_kn = (bn.
nk() > 1 ? bn.
nk()-1 : 1);
  5310                 for (
int kn = bn.
ka();  kn <= bn.
kb();  kn += inc_kn) {
  5311                   for (
int jn = bn.
ja();  jn <= bn.
jb();  jn += inc_jn) {
  5312                     for (
int in = bn.
ia();  in <= bn.
ib();  in += inc_in) {
  5314                       int mia = ( qia >= gia + in ? qia : gia + in );
  5315                       int mib = ( qib <= gib + in ? qib : gib + in );
  5316                       int mja = ( qja >= gja + jn ? qja : gja + jn );
  5317                       int mjb = ( qjb <= gjb + jn ? qjb : gjb + jn );
  5318                       int mka = ( qka >= gka + kn ? qka : gka + kn );
  5319                       int mkb = ( qkb <= gkb + kn ? qkb : gkb + kn );
  5320                       int inc_im = (mib-mia > 0 ? mib-mia : 1);
  5321                       int inc_jm = (mjb-mja > 0 ? mjb-mja : 1);
  5322                       int inc_km = (mkb-mka > 0 ? mkb-mka : 1);
  5325                       for (
int km = mka;  km <= mkb;  km += inc_km) {
  5326                         for (
int jm = mja;  jm <= mjb;  jm += inc_jm) {
  5327                           for (
int im = mia;  im <= mib;  im += inc_im) {
  5334                               g = 
map.
gc[level](im-in,jm-jn,km-kn);
  5349                 b(i,j,k).sendAcross.append(bs);
  5363             int ia2, ib2, ja2, jb2, ka2, kb2;
  5364             ia = b(i,j,k).nrange.
ia();
  5365             ib = b(i,j,k).nrange.
ib();
  5366             ja = b(i,j,k).nrange.
ja();
  5367             jb = b(i,j,k).nrange.
jb();
  5368             ka = b(i,j,k).nrange.
ka();
  5369             kb = b(i,j,k).nrange.
kb();
  5371             if ( ia==ib && ((ia & 1)==0) ) {
  5375               ia2 = (ia / 2) - ((polydeg+1) / 2) + 1;
  5376               ib2 = ((ib+1) / 2) + ((polydeg+1) / 2) - 1;
  5378             if ( ja==jb && ((ja & 1)==0) ) {
  5382               ja2 = (ja / 2) - ((polydeg+1) / 2) + 1;
  5383               jb2 = ((jb+1) / 2) + ((polydeg+1) / 2) - 1;
  5385             if ( ka==kb && ((ka & 1)==0) ) {
  5389               ka2 = (ka / 2) - ((polydeg+1) / 2) + 1;
  5390               kb2 = ((kb+1) / 2) + ((polydeg+1) / 2) - 1;
  5396             b(i,j,k).nrangeRestricted.
setbounds(na2.
i, nb2.
i, na2.
j, nb2.
j,
  5401             int maxarrlen = (bupper.
n.
i - blower.
n.
i + 1) *
  5402               (bupper.
n.
j - blower.
n.
j + 1) * (bupper.
n.
k - blower.
n.
k + 1);
  5403             b(i,j,k).sendUp.setmax(maxarrlen);  
  5406             for (kk = blower.
n.
k;  kk <= bupper.
n.
k;  kk++) {
  5407               for (jj = blower.
n.
j;  jj <= bupper.
n.
j;  jj++) {
  5408                 for (ii = blower.
n.
i;  ii <= bupper.
n.
i;  ii++) {
  5414                       b(i,j,k).nrangeRestricted);
  5416                   b(i,j,k).sendUp.append(bs);
  5427             int ia2 = b(i,j,k).nrange.
ia();
  5428             int ib2 = b(i,j,k).nrange.
ib();
  5429             int ja2 = b(i,j,k).nrange.
ja();
  5430             int jb2 = b(i,j,k).nrange.
jb();
  5431             int ka2 = b(i,j,k).nrange.
ka();
  5432             int kb2 = b(i,j,k).nrange.
kb();
  5434             ia = 2*ia2 - polydeg;
  5435             ib = 2*ib2 + polydeg;
  5436             ja = 2*ja2 - polydeg;
  5437             jb = 2*jb2 + polydeg;
  5438             ka = 2*ka2 - polydeg;
  5439             kb = 2*kb2 + polydeg;
  5444             b(i,j,k).nrangeProlongated.
setbounds(na.
i, nb.
i, na.
j, nb.
j,
  5449             int maxarrlen = (bupper.
n.
i - blower.
n.
i + 1) *
  5450               (bupper.
n.
j - blower.
n.
j + 1) * (bupper.
n.
k - blower.
n.
k + 1);
  5451             b(i,j,k).sendDown.setmax(maxarrlen);  
  5454             for (kk = blower.
n.
k;  kk <= bupper.
n.
k;  kk++) {
  5455               for (jj = blower.
n.
j;  jj <= bupper.
n.
j;  jj++) {
  5456                 for (ii = blower.
n.
i;  ii <= bupper.
n.
i;  ii++) {
  5462                       b(i,j,k).nrangeProlongated);
  5464                   b(i,j,k).sendDown.append(bs);
  5473 #ifdef MSM_REDUCE_GRID  5476           b(i,j,k).numRecvsPotential -= ( b(i,j,k).indexGridCutoff.len() - 1 );
  5501 #ifdef DEBUG_MSM_VERBOSE  5502     printf(
"Allocating patchPtr array length %d\n", pm->
numPatches());
  5504     if (CkMyPe() == 0) {
  5506                     << 
" interpolation / anterpolation objects"  5507                     << 
" (one per patch)\n" << 
endi;
  5511 #ifdef MSM_NODE_MAPPING  5531     int numNodes = CkNumNodes();
  5532     int numPes = CkNumPes();
  5534     int numPesPerNode = numPes / numNodes;
  5536     for (level = 0;  level < 
nlevels;  level++) {
  5551 #ifdef MSM_NODE_MAPPING_STATS  5557     for (n = 0;  n < numNodes;  n++) {
  5562     for (level = 0;  level < 
nlevels;  level++) {
  5567       for (k = 0;  k < bnk;  k++) { 
  5568         for (j = 0;  j < bnj;  j++) {
  5569           for (i = 0;  i < bni;  i++) {
  5571             nodeQueue.remove(wn);
  5574             nodeWork[wn.
index] += bw;
  5576             blockWork[bindex] = bw;
  5577             nodeQueue.insert(wn);
  5585     for (n = 0;  n < numBlocks;  n++) {
  5587       nodeQueue.remove(wn);
  5590       nodeWork[wn.
index] += bw;
  5593       nodeQueue.insert(wn);
  5600     for (level = 0;  level < 
nlevels;  level++) { 
  5605       for (k = 0;  k < bnk;  k++) { 
  5606         for (j = 0;  j < bnj;  j++) {
  5607           for (i = 0;  i < bni;  i++) {
  5610             int numSendAcross = b(i,j,k).sendAcross.len();
  5611             ASSERT( numSendAcross == b(i,j,k).indexGridCutoff.len() );
  5612             for (n = 0;  n < numSendAcross;  n++) {
  5618               if (nodeWork[nsrc] <= nodeWork[ndest]) {
  5620                 nodeWork[nsrc] += gcutWork[gindex];
  5624                 nodeWork[ndest] += gcutWork[gindex];
  5634     for (n = 0;  n < numNodes;  n++) {
  5635       peQueue[n].init(numPesPerNode);
  5636       for (
int poff = 0;  poff < numPesPerNode;  poff++) {
  5637         peQueue[n].insert(
WorkIndex(0, n*numPesPerNode + poff));
  5641     for (n = 0;  n < numBlocks;  n++) {
  5644       peQueue[node].remove(wn);
  5646       wn.
work += blockWork[n];
  5647       peQueue[node].insert(wn);
  5648 #ifdef MSM_NODE_MAPPING_STATS  5649       peWork[wn.
index] += blockWork[n];
  5656       peQueue[node].remove(wn);
  5658       wn.
work += gcutWork[n];
  5659       peQueue[node].insert(wn);
  5660 #ifdef MSM_NODE_MAPPING_STATS  5661       peWork[wn.
index] += gcutWork[n];
  5665 #ifdef MSM_NODE_MAPPING_STATS  5666     if (CkMyPe() == 0) {
  5667       printf(
"Mapping of MSM work (showing scaled estimated work units):\n");
  5668       for (n = 0;  n < numNodes;  n++) {
  5669         printf(
"    node %d   work %8.3f\n", n, nodeWork[n]);
  5670         for (
int poff = 0;  poff < numPesPerNode;  poff++) {
  5671           int p = n*numPesPerNode + poff;
  5672           printf(
"        pe %d     work %8.3f\n", p, peWork[p]);
  5681     for (level = 0;  level < 
nlevels;  level++) {
  5689     nodecnt.resize(numNodes);
  5694     int r = numBlocks % numNodes;
  5695     int q = numBlocks / numNodes;
  5697     for (n = 0;  n < numNodes - r;  n++) {
  5698       int moffset = n * q;
  5699       for (
int m = 0;  m < q;  m++) {
  5704     for ( ;  n < numNodes;  n++) {
  5705       int moffset = (numNodes - r)*q + (n - (numNodes - r))*qp;
  5706       for (
int m = 0;  m < qp;  m++) {
  5712     if (CkMyPe() == 0) {
  5713       CkPrintf(
"%d objects to %d nodes\n", q, numNodes-r);
  5715         CkPrintf(
"%d objects to %d nodes\n", qp, r);
  5717       CkPrintf(
"%d  =?  %d\n", (numNodes-r)*q + r*qp, numBlocks);
  5724     for (level = 0;  level < 
nlevels;  level++) { 
  5729       for (k = 0;  k < bnk;  k++) { 
  5730         for (j = 0;  j < bnj;  j++) {
  5731           for (i = 0;  i < bni;  i++) {
  5734             int numSendAcross = b(i,j,k).sendAcross.len();
  5735             ASSERT( numSendAcross == b(i,j,k).indexGridCutoff.len() );
  5736             for (n = 0;  n < numSendAcross;  n++) {
  5741               if (nodecnt[nsrc] <= nodecnt[ndest]) {
  5758     int ppn = numPes / numNodes;  
  5760     for (n = 0;  n < numNodes;  n++)  nodecnt[n] = 0;
  5761     for (n = 0;  n < numBlocks;  n++) {
  5765       if (nodecnt[node] >= ppn)  nodecnt[node] = 0;  
  5771       if (nodecnt[node] >= ppn)  nodecnt[node] = 0;  
  5776     if (CkMyPe() == 0) {
  5777       for (n = 0;  n < numBlocks;  n++) {
  5778         CkPrintf(
"block %d:   node=%d  pe=%d\n",
  5783         CkPrintf(
"grid cutoff %d:   node=%d  pe=%d\n",
  5793 #endif // MSM_NODE_MAPPING  5799   int i, j, k, n, level;
  5801   if (CkMyPe() == 0) {
  5811     for (level = 0;  level < 
nlevels;  level++) {
  5815 #ifdef MSM_NODE_MAPPING  5816       CkPrintf(
"Using MsmBlockMap for level %d\n", level);
  5817       CProxy_MsmBlockMap blockMap = CProxy_MsmBlockMap::ckNew(level);
  5818       CkArrayOptions opts(ni, nj, nk);
  5819       opts.setMap(blockMap);
  5822           CProxy_MsmC1HermiteBlock::ckNew(level, opts);
  5825         msmBlock[level] = CProxy_MsmBlock::ckNew(level, opts);
  5830           CProxy_MsmC1HermiteBlock::ckNew(level, ni, nj, nk);
  5833         msmBlock[level] = CProxy_MsmBlock::ckNew(level, ni, nj, nk);
  5836 #ifdef DEBUG_MSM_VERBOSE  5837       printf(
"Create MsmBlock[%d] 3D chare array ( %d x %d x %d )\n",
  5841       int nijk = ni * nj * nk;
  5842       sprintf(msg, 
"MSM grid level %d decomposed into %d block%s"  5843           " ( %d x %d x %d )\n",
  5844           level, nijk, (nijk==1 ? 
"" : 
"s"), ni, nj, nk);
  5850       msmProxy.recvMsmC1HermiteBlockProxy(msg);  
  5858 #ifdef MSM_GRID_CUTOFF_DECOMP  5861 #ifdef MSM_NODE_MAPPING  5862     CkPrintf(
"Using MsmGridCutoffMap\n");
  5863     CProxy_MsmGridCutoffMap gcutMap = CProxy_MsmGridCutoffMap::ckNew();
  5865     optsgcut.setMap(gcutMap);
  5885       msmProxy.recvMsmC1HermiteGridCutoffProxy(gcmsg);
  5890       msmProxy.recvMsmGridCutoffProxy(gcmsg);
  5896     for (level = 0;  level < 
nlevels;  level++) { 
  5901       for (k = 0;  k < bnk;  k++) { 
  5902         for (j = 0;  j < bnj;  j++) {
  5903           for (i = 0;  i < bni;  i++) {
  5906             int numSendAcross = b(i,j,k).sendAcross.len();
  5907             ASSERT( numSendAcross == b(i,j,k).indexGridCutoff.len() );
  5909             for (n = 0;  n < numSendAcross;  n++) {
  5911               int index = b(i,j,k).indexGridCutoff[n];
  5927     iout << 
iINFO << 
"MSM grid cutoff calculation decomposed into "  5933 #ifdef DEBUG_MSM_VERBOSE  5934   printf(
"end of initialization\n");
  5969 #ifdef DEBUG_MSM_VERBOSE  5970   printf(
"ComputeMsmMgr:  update() PE %d\n", CkMyPe());
  5975   if (CkMyPe() == 0) {
  5976     for (
int level = 0;  level < 
nlevels;  level++) {
  5992 #ifdef DEBUG_MSM_VERBOSE  5993   printf(
"ComputeMsmMgr:  compute() PE=%d\n", CkMyPe());
  5997   for (n = 0;  n < patchIDList.
len();  n++) {
  5998     int patchID = patchIDList[n];
  6001       snprintf(msg, 
sizeof(msg),
  6002           "Expected MSM data for patch %d does not exist on PE %d",
  6007       patchPtr[patchID]->anterpolationC1Hermite();
  6010       patchPtr[patchID]->anterpolation();
  6031     snprintf(msg, 
sizeof(msg), 
"Expecting patch %d to exist on PE %d",
  6059   CProxy_ComputeMsmMgr::ckLocalBranch(
  6060       CkpvAccess(BOCclass_group).computeMsmMgr)->setCompute(
this);
  6064 #ifdef DEBUG_MSM_VERBOSE  6065   printf(
"ComputeMsm:  (constructor) PE=%d\n", CkMyPe());
  6072 #ifdef DEBUG_MSM_VERBOSE  6073   printf(
"ComputeMsm:  (destructor) PE=%d\n", CkMyPe());
  6080 #ifdef DEBUG_MSM_VERBOSE  6081   printf(
"ComputeMsm:  doWork() PE=%d\n", CkMyPe());
  6086   myMgr->initTiming();
  6088 #ifdef MSM_PROFILING  6089   myMgr->initProfiling();
  6096   cntLocalPatches = 0;
  6097   ASSERT(cntLocalPatches < numLocalPatches);
  6099 #ifdef DEBUG_MSM_VERBOSE  6104   if ( ! 
patchList[0].p->flags.doFullElectrostatics ) {
  6105     for (ap = ap.
begin();  ap != ap.
end();  ap++) {
  6106       CompAtom *x = (*ap).positionBox->open();
  6107       Results *r = (*ap).forceBox->open();
  6108       (*ap).positionBox->close(&x);
  6109       (*ap).forceBox->close(&r);
  6122   for (ap = ap.
begin();  ap != ap.
end();  ap++) {
  6123     CompAtom *x = (*ap).positionBox->open();
  6124     CompAtomExt *xExt = (*ap).p->getCompAtomExtInfo();
  6126       (*ap).positionBox->close(&x);
  6127       x = (*ap).avgPositionBox->open();
  6129     int numAtoms = (*ap).p->getNumAtoms();
  6130     int patchID = (*ap).patchID;
  6131     patchIDList.
append(patchID);
  6132     if (patchPtr[patchID] == NULL) {
  6135 #ifdef DEBUG_MSM_VERBOSE  6136       printf(
"Creating new PatchData:  patchID=%d  PE=%d\n",
  6141     patch.
init(numAtoms);
  6144     for (n = 0;  n < numAtoms;  n++) {
  6146       coord[n].charge = qscaling * x[n].
charge;
  6147       coord[n].id = xExt[n].
id;
  6150       (*ap).avgPositionBox->close(&x);
  6153       (*ap).positionBox->close(&x);
  6163   if (++cntLocalPatches != numLocalPatches) 
return;
  6168   for (ap = ap.
begin();  ap != ap.
end();  ap++) {
  6169     int patchID = (*ap).patchID;
  6178 #ifdef DEBUG_MSM_VERBOSE  6179   printf(
"ComputeMsm:  saveResults() PE=%d\n", CkMyPe());
  6186   for (ap = ap.
begin(); ap != ap.
end(); ap++) {
  6187     Results *r = (*ap).forceBox->open();
  6189     int numAtoms = (*ap).p->getNumAtoms();
  6190     int patchID = (*ap).patchID;
  6191     if (patchPtr[patchID] == NULL) {
  6193       snprintf(msg, 
sizeof(msg), 
"Expecting patch %d to exist on PE %d",
  6199     for (n = 0;  n < numAtoms;  n++) {
  6200       f[n] += patch.
force[n];
  6202     (*ap).forceBox->close(&r);
  6258     for (
int i = 0;  i < natoms;  i++)  
force[i] = 0;
  6270 #ifdef DEBUG_MSM_GRID  6271     printf(
"patchID %d:  anterpolation\n", 
patchID);
  6275     double startTime, stopTime;
  6276     startTime = CkWallTimer();
  6278 #ifndef MSM_COMM_ONLY  6286     const int ia = 
qh.
ia();
  6287     const int ib = 
qh.
ib();
  6288     const int ja = 
qh.
ja();
  6289     const int jb = 
qh.
jb();
  6290     const int ka = 
qh.
ka();
  6291     const int kb = 
qh.
kb();
  6292     const int ni = 
qh.
ni();
  6293     const int nj = 
qh.
nj();
  6297     for (
int n = 0;  n < 
coord.
len();  n++) {
  6307       BigReal xlo = floor(sx_hx) - rs_edge;
  6308       BigReal ylo = floor(sy_hy) - rs_edge;
  6309       BigReal zlo = floor(sz_hz) - rs_edge;
  6324       int iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
  6325                        ja <= jlo && (jlo+(s_size-1)) <= jb &&
  6326                        ka <= klo && (klo+(s_size-1)) <= kb );
  6330         printf(
"PE %d:  atom %d:  pos= %g %g %g  patchID=%d\n",
  6331             CkMyPe(), 
coord[n].
id,
  6334         printf(
"PE %d:  atom subgrid [%d..%d] x [%d..%d] x [%d..%d]\n",
  6336             ilo, ilo+s_size-1, jlo, jlo+s_size-1, klo, klo+s_size-1);
  6337         printf(
"PE %d:  patch grid [%d..%d] x [%d..%d] x [%d..%d]\n",
  6339             ia, ib, ja, jb, ka, kb);
  6342         snprintf(msg, 
sizeof(msg), 
"Atom %d is outside of the MSM grid.",
  6348       for (
int k = 0;  k < s_size;  k++) {
  6349         int koff = ((k+klo) - ka) * nj;
  6350         Float ck = zphi[k] * q;
  6351         for (
int j = 0;  j < s_size;  j++) {
  6352           int jkoff = (koff + (j+jlo) - ja) * ni;
  6353           Float cjk = yphi[j] * ck;
  6354           for (
int i = 0;  i < s_size;  i++) {
  6355             int ijkoff = jkoff + (i+ilo) - ia;
  6356             qhbuffer[ijkoff] += xphi[i] * cjk;
  6362 #endif // !MSM_COMM_ONLY  6364     stopTime = CkWallTimer();
  6373     double startTime, stopTime;
  6380     for (
int n = 0;  n < 
pd->
send.len();  n++) {
  6382       startTime = CkWallTimer();
  6398       stopTime = CkWallTimer();
  6402           bindex.
n.
i, bindex.
n.
j, bindex.
n.
k).addCharge(gm);
  6408     double startTime, stopTime;
  6409     startTime = CkWallTimer();
  6413     stopTime = CkWallTimer();
  6422 #ifdef DEBUG_MSM_GRID  6423     printf(
"patchID %d:  interpolation\n", 
patchID);
  6427     double startTime, stopTime;
  6428     startTime = CkWallTimer();
  6430 #ifndef MSM_COMM_ONLY  6447     const int ia = 
eh.
ia();
  6448     const int ib = 
eh.
ib();
  6449     const int ja = 
eh.
ja();
  6450     const int jb = 
eh.
jb();
  6451     const int ka = 
eh.
ka();
  6452     const int kb = 
eh.
kb();
  6453     const int ni = 
eh.
ni();
  6454     const int nj = 
eh.
nj();
  6458     for (
int n = 0;  n < 
coord.
len();  n++) {
  6468       BigReal xlo = floor(sx_hx) - rs_edge;
  6469       BigReal ylo = floor(sy_hy) - rs_edge;
  6470       BigReal zlo = floor(sz_hz) - rs_edge;
  6488       int iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
  6489                        ja <= jlo && (jlo+(s_size-1)) <= jb &&
  6490                        ka <= klo && (klo+(s_size-1)) <= kb );
  6494         snprintf(msg, 
sizeof(msg), 
"Atom %d is outside of the MSM grid.",
  6503       Float fx=0, fy=0, fz=0, e=0;
  6504       for (
int k = 0;  k < s_size;  k++) {
  6505         int koff = ((k+klo) - ka) * nj;
  6506         for (
int j = 0;  j < s_size;  j++) {
  6507           int jkoff = (koff + (j+jlo) - ja) * ni;
  6508           Float cx = yphi[j] * zphi[k];
  6509           Float cy = dyphi[j] * zphi[k];
  6510           Float cz = yphi[j] * dzphi[k];
  6511           for (
int i = 0;  i < s_size;  i++) {
  6512             int ijkoff = jkoff + (i+ilo) - ia;
  6513             Float ec = ehbuffer[ijkoff];
  6514             fx += ec * dxphi[i] * cx;
  6515             fy += ec * xphi[i] * cy;
  6516             fz += ec * xphi[i] * cz;
  6517             e += ec * xphi[i] * cx;
  6527       force[n].x -= q * fx;
  6528       force[n].y -= q * fy;
  6529       force[n].z -= q * fz;
  6531       energy_self += q * q;
  6538 #endif // !MSM_COMM_ONLY  6540     stopTime = CkWallTimer();
  6548 #ifdef DEBUG_MSM_GRID  6549     printf(
"patchID %d:  anterpolationC1Hermite\n", 
patchID);
  6553     double startTime, stopTime;
  6554     startTime = CkWallTimer();
  6556 #ifndef MSM_COMM_ONLY  6557     Float xphi[2], xpsi[2];
  6558     Float yphi[2], ypsi[2];
  6559     Float zphi[2], zpsi[2];
  6576     for (
int n = 0;  n < 
coord.
len();  n++) {
  6603       int iswithin = ( ia <= ilo && ilo < ib &&
  6604                        ja <= jlo && jlo < jb &&
  6605                        ka <= klo && klo < kb );
  6609         snprintf(msg, 
sizeof(msg), 
"Atom %d is outside of the MSM grid.",
  6615       for (
int k = 0;  k < 2;  k++) {
  6616         int koff = ((k+klo) - ka) * nj;
  6617         Float c_zphi = zphi[k] * q;
  6618         Float c_zpsi = zpsi[k] * q;
  6619         for (
int j = 0;  j < 2;  j++) {
  6620           int jkoff = (koff + (j+jlo) - ja) * ni;
  6621           Float c_yphi_zphi = yphi[j] * c_zphi;
  6622           Float c_ypsi_zphi = ypsi[j] * c_zphi;
  6623           Float c_yphi_zpsi = yphi[j] * c_zpsi;
  6624           Float c_ypsi_zpsi = ypsi[j] * c_zpsi;
  6625           for (
int i = 0;  i < 2;  i++) {
  6626             int ijkoff = jkoff + (i+ilo) - ia;
  6627             qhbuffer[ijkoff].
velem[
D000] += xphi[i] * c_yphi_zphi;
  6628             qhbuffer[ijkoff].
velem[
D100] += xpsi[i] * c_yphi_zphi;
  6629             qhbuffer[ijkoff].
velem[
D010] += xphi[i] * c_ypsi_zphi;
  6630             qhbuffer[ijkoff].
velem[
D001] += xphi[i] * c_yphi_zpsi;
  6631             qhbuffer[ijkoff].
velem[
D110] += xpsi[i] * c_ypsi_zphi;
  6632             qhbuffer[ijkoff].
velem[
D101] += xpsi[i] * c_yphi_zpsi;
  6633             qhbuffer[ijkoff].
velem[
D011] += xphi[i] * c_ypsi_zpsi;
  6634             qhbuffer[ijkoff].
velem[
D111] += xpsi[i] * c_ypsi_zpsi;
  6641 #endif // !MSM_COMM_ONLY  6643     stopTime = CkWallTimer();
  6652     double startTime, stopTime;
  6656     for (
int n = 0;  n < 
pd->
send.len();  n++) {
  6658       startTime = CkWallTimer();
  6674       stopTime = CkWallTimer();
  6678           bindex.
n.
i, bindex.
n.
j, bindex.
n.
k).addCharge(gm);
  6684     double startTime, stopTime;
  6685     startTime = CkWallTimer();
  6689     stopTime = CkWallTimer();
  6698 #ifdef DEBUG_MSM_GRID  6699     printf(
"patchID %d:  interpolation\n", 
patchID);
  6703     double startTime, stopTime;
  6704     startTime = CkWallTimer();
  6706 #ifndef MSM_COMM_ONLY  6709     Float xphi[2], dxphi[2], xpsi[2], dxpsi[2];
  6710     Float yphi[2], dyphi[2], ypsi[2], dypsi[2];
  6711     Float zphi[2], dzphi[2], zpsi[2], dzpsi[2];
  6732     for (
int n = 0;  n < 
coord.
len();  n++) {
  6765       int iswithin = ( ia <= ilo && ilo < ib &&
  6766                        ja <= jlo && jlo < jb &&
  6767                        ka <= klo && klo < kb );
  6771         snprintf(msg, 
sizeof(msg), 
"Atom %d is outside of the MSM grid.",
  6778       Float fx=0, fy=0, fz=0, e=0;
  6779       for (
int k = 0;  k < 2;  k++) {
  6780         int koff = ((k+klo) - ka) * nj;
  6781         for (
int j = 0;  j < 2;  j++) {
  6782           int jkoff = (koff + (j+jlo) - ja) * ni;
  6783           Float c_yphi_zphi = yphi[j] * zphi[k];
  6784           Float c_ypsi_zphi = ypsi[j] * zphi[k];
  6785           Float c_yphi_zpsi = yphi[j] * zpsi[k];
  6786           Float c_ypsi_zpsi = ypsi[j] * zpsi[k];
  6787           Float c_yphi_dzphi = yphi[j] * dzphi[k];
  6788           Float c_ypsi_dzphi = ypsi[j] * dzphi[k];
  6789           Float c_yphi_dzpsi = yphi[j] * dzpsi[k];
  6790           Float c_ypsi_dzpsi = ypsi[j] * dzpsi[k];
  6791           Float c_dyphi_zphi = dyphi[j] * zphi[k];
  6792           Float c_dypsi_zphi = dypsi[j] * zphi[k];
  6793           Float c_dyphi_zpsi = dyphi[j] * zpsi[k];
  6794           Float c_dypsi_zpsi = dypsi[j] * zpsi[k];
  6795           for (
int i = 0;  i < 2;  i++) {
  6796             int ijkoff = jkoff + (i+ilo) - ia;
  6797             fx += dxphi[i] * (c_yphi_zphi * ehbuffer[ijkoff].
velem[
D000]
  6798                 + c_ypsi_zphi * ehbuffer[ijkoff].
velem[
D010]
  6799                 + c_yphi_zpsi * ehbuffer[ijkoff].
velem[
D001]
  6800                 + c_ypsi_zpsi * ehbuffer[ijkoff].
velem[
D011])
  6801               + dxpsi[i] * (c_yphi_zphi * ehbuffer[ijkoff].velem[
D100]
  6802                   + c_ypsi_zphi * ehbuffer[ijkoff].velem[
D110]
  6803                   + c_yphi_zpsi * ehbuffer[ijkoff].velem[
D101]
  6804                   + c_ypsi_zpsi * ehbuffer[ijkoff].velem[
D111]);
  6805             fy += xphi[i] * (c_dyphi_zphi * ehbuffer[ijkoff].
velem[
D000]
  6806                 + c_dypsi_zphi * ehbuffer[ijkoff].
velem[
D010]
  6807                 + c_dyphi_zpsi * ehbuffer[ijkoff].
velem[
D001]
  6808                 + c_dypsi_zpsi * ehbuffer[ijkoff].
velem[
D011])
  6809               + xpsi[i] * (c_dyphi_zphi * ehbuffer[ijkoff].velem[
D100]
  6810                   + c_dypsi_zphi * ehbuffer[ijkoff].velem[
D110]
  6811                   + c_dyphi_zpsi * ehbuffer[ijkoff].velem[
D101]
  6812                   + c_dypsi_zpsi * ehbuffer[ijkoff].velem[
D111]);
  6813             fz += xphi[i] * (c_yphi_dzphi * ehbuffer[ijkoff].
velem[
D000]
  6814                 + c_ypsi_dzphi * ehbuffer[ijkoff].
velem[
D010]
  6815                 + c_yphi_dzpsi * ehbuffer[ijkoff].
velem[
D001]
  6816                 + c_ypsi_dzpsi * ehbuffer[ijkoff].
velem[
D011])
  6817               + xpsi[i] * (c_yphi_dzphi * ehbuffer[ijkoff].velem[
D100]
  6818                   + c_ypsi_dzphi * ehbuffer[ijkoff].velem[
D110]
  6819                   + c_yphi_dzpsi * ehbuffer[ijkoff].velem[
D101]
  6820                   + c_ypsi_dzpsi * ehbuffer[ijkoff].velem[
D111]);
  6821             e += xphi[i] * (c_yphi_zphi * ehbuffer[ijkoff].
velem[
D000]
  6822                 + c_ypsi_zphi * ehbuffer[ijkoff].
velem[
D010]
  6823                 + c_yphi_zpsi * ehbuffer[ijkoff].
velem[
D001]
  6824                 + c_ypsi_zpsi * ehbuffer[ijkoff].
velem[
D011])
  6825               + xpsi[i] * (c_yphi_zphi * ehbuffer[ijkoff].velem[
D100]
  6826                   + c_ypsi_zphi * ehbuffer[ijkoff].velem[
D110]
  6827                   + c_yphi_zpsi * ehbuffer[ijkoff].velem[
D101]
  6828                   + c_ypsi_zpsi * ehbuffer[ijkoff].velem[
D111]);
  6838       force[n].x -= q * fx;
  6839       force[n].y -= q * fy;
  6840       force[n].z -= q * fz;
  6842       energy_self += q * q;
  6849 #endif // !MSM_COMM_ONLY  6851     stopTime = CkWallTimer();
  6861 #include "ComputeMsmMgr.def.h" 
char msmGridCutoffProxyData[sizeof(CProxy_MsmGridCutoff)]
 
msm::Grid< Float > ehfull
 
Array< Grid< C1Matrix > > gpro_c1hermite
 
Array< PatchDiagram > patchList
 
msm::Grid< C1Vector > ehfull
 
Grid< C1Vector > qh_c1hermite
 
msm::Array< int > blockAssign
 
float calcGcutWork(const msm::BlockSend &bs)
 
float calcBlockWork(const msm::BlockDiagram &b)
 
int registerArray(CkArrayIndex &numElements, CkArrayID aid)
 
std::ostream & iINFO(std::ostream &s)
 
Array< int > recvGridCutoff
 
void setCompute(ComputeMsm *c)
 
int procNum(int, const CkArrayIndex &idx)
 
void sumReducedPotential(CkReductionMsg *msg)
 
#define MSM_MAX_BLOCK_SIZE
 
msm::PatchPtrArray patchPtr
 
Grid< C1Vector > subgrid_c1hermite
 
NAMD_HOST_DEVICE Vector c() const
 
MsmC1HermiteBlock(CkMigrateMessage *m)
 
BlockIndex blockOfGridIndexFold(const Ivec &n, int level) const
 
void put(const msm::Array< CProxy_MsmC1HermiteBlock > &a)
 
void addCharge(GridMsg *)
 
NAMD_HOST_DEVICE int c_p() const
 
Ivec clipIndexToLevel(const Ivec &n, int level) const
 
void setup(MsmGridCutoffInitMsg *bmsg)
 
void get(msm::Array< CProxy_MsmBlock > &a)
 
msm::Grid< Vtype > ehfold
 
BigReal max_a(int pid) const
 
IndexRange clipBlockToIndexRangeFold(const BlockIndex &nb, const IndexRange &nrange) const
 
#define MSM_MAX_BLOCK_VOLUME
 
void interpolationC1Hermite()
 
static PatchMap * Object()
 
virtual void submit(void)=0
 
SimParameters * simParameters
 
void compute_specialized(GridMsg *gmsg)
 
ComputeHomePatchList patchList
 
PatchData(ComputeMsmMgr *pmgr, int pid)
 
void addPotential(GridMsg *)
 
static const int IndexOffset[NUM_APPROX][MAX_NSTENCIL_SKIP_ZERO]
 
void compute(GridMsg *gmsg)
 
std::ostream & endi(std::ostream &s)
 
msm::BlockSend ehblockSend
 
CProxySection_MsmC1HermiteGridCutoff msmGridCutoffBroadcast
 
char msmGridCutoffProxyData[sizeof(CProxy_MsmC1HermiteGridCutoff)]
 
ForceArray & forceArray()
 
void done(int lc[], int n)
 
SubmitReduction * willSubmit(int setID, int size=-1)
 
msm::BlockIndex qhblockIndex
 
void wrapBlockSend(BlockSend &bs) const
 
ResizeArrayIter< T > begin(void) const
 
int index_a(int pid) const
 
static ReductionMgr * Object(void)
 
msm::Grid< Vtype > subgrid
 
void get(msm::Array< CProxy_MsmC1HermiteBlock > &a)
 
void setup_periodic_blocksize(int &bsize, int n)
 
CProxy_ComputeMsmMgr mgrProxy
 
Array< BlockSend > sendUp
 
void extract(Grid< T > &g)
 
#define C1INDEX(drj, dri)
 
void put(const CProxyElement_MsmBlock *q)
 
void compute(GridMsg *gmsg)
 
void addPotential(const Grid< Float > &epart)
 
char msmBlockProxyData[maxlevels *sizeof(CProxy_MsmBlock)]
 
void setbounds(int pia, int pib, int pja, int pjb, int pka, int pkb)
 
NAMD_HOST_DEVICE int b_p() const
 
void wrapBlockIndex(BlockIndex &bn) const
 
void compute(msm::Array< int > &patchIDList)
 
void stencil_1d(Float phi[], Float t)
 
BigReal gridScalingFactor
 
void setupSections(MsmC1HermiteGridCutoffSetupMsg *msg)
 
void set(int pia, int pni, int pja, int pnj, int pka, int pnk)
 
msm::BlockIndex blockIndex
 
void setbounds(int pia, int pib, int pja, int pjb, int pka, int pkb)
 
int gridsize_c(void) const
 
CProxyElement_MsmC1HermiteBlock msmBlockElementProxy
 
Array< Grid< BlockDiagram > > blockLevel
 
void put(const msm::Array< CProxy_MsmBlock > &a)
 
MsmC1HermiteBlock(int level)
 
void recvMsmBlockProxy(MsmBlockProxyMsg *)
 
CProxyElement_MsmBlock msmBlockElementProxy
 
msm::BlockIndex qhBlockIndex
 
void init(const IndexRange &n)
 
int gridsize_a(void) const
 
void addPotential(GridMsg *)
 
void recvMsmC1HermiteBlockProxy(MsmC1HermiteBlockProxyMsg *)
 
static const Float PhiStencil[NUM_APPROX_FORMS][MAX_NSTENCIL_SKIP_ZERO]
 
Array< BlockSend > sendAcross
 
int numPatches(void) const
 
WorkIndex(float w, int i)
 
void prolongationKernel()
 
CProxy_ComputeMsmMgr msmProxy
 
void setupSections(MsmGridCutoffSetupMsg *msg)
 
Array< Grid< C1Matrix > > gres_c1hermite
 
MsmBlockKernel(CkMigrateMessage *m)
 
MsmBlockMap(CkMigrateMessage *m)
 
NAMD_HOST_DEVICE BigReal length(void) const
 
static const int Nstencil[NUM_APPROX]
 
char msmBlockProxyData[maxlevels *sizeof(CProxy_MsmC1HermiteBlock)]
 
BigReal min_c(int pid) const
 
const msm::Grid< Mtype > * pgc
 
void get(CProxyElement_MsmBlock *q)
 
void get(CProxy_MsmC1HermiteGridCutoff *p)
 
int registerArray(CkArrayIndex &numElements, CkArrayID aid)
 
void setup_hgrid_1d(BigReal len, BigReal &hh, int &nn, int &ia, int &ib, int isperiodic)
 
int index_b(int pid) const
 
NAMD_HOST_DEVICE ScaledPosition scale(Position p) const
 
void addCharge(GridMsg *)
 
NAMD_HOST_DEVICE BigReal length2(void) const
 
void setup(MsmGridCutoffInitMsg *bmsg)
 
NAMD_HOST_DEVICE int a_p() const
 
NAMD_HOST_DEVICE Vector a_r() const
 
msm::Grid< Float > subgrid
 
NAMD_HOST_DEVICE Vector b_r() const
 
CProxySection_MsmC1HermiteGridCutoff msmGridCutoffReduction
 
msm::BlockSend ehBlockSend
 
Array< IndexRange > gridrange
 
void put(const msm::Grid< T > &g, int id, int seq)
 
AtomCoordArray & coordArray()
 
void recvMsmGridCutoffProxy(MsmGridCutoffProxyMsg *)
 
Array< PatchSend > sendPatch
 
void NAMD_die(const char *err_msg)
 
int operator<=(const WorkIndex &wn)
 
CProxySection_MsmGridCutoff msmGridCutoffBroadcast
 
MsmGridCutoff(CkMigrateMessage *m)
 
BigReal min_a(int pid) const
 
BlockIndex blockOfGridIndex(const Ivec &n, int level) const
 
NAMD_HOST_DEVICE Vector c_r() const
 
CProxy_MsmGridCutoff msmGridCutoff
 
NAMD_HOST_DEVICE Vector b() const
 
void get(CProxy_MsmGridCutoff *p)
 
const Array< T > & data() const
 
void setupWeights(const msm::Grid< Mtype > *ptrgc, const msm::Grid< Mtype > *ptrgvc)
 
CProxy_MsmC1HermiteGridCutoff msmC1HermiteGridCutoff
 
int index_c(int pid) const
 
int blockFlatIndex(int level, int i, int j, int k)
 
static void ndsplitting(BigReal pg[], BigReal s, int n, int _split)
 
void updateLower(const Ivec &n)
 
Float melem[C1_MATRIX_SIZE]
 
void get(CProxyElement_MsmC1HermiteBlock *q)
 
void done(double tm[], int n)
 
void addPotential(GridMsg *)
 
const msm::Grid< Mtype > * proStencil
 
IndexRange clipBlockToIndexRange(const BlockIndex &nb, const IndexRange &nrange) const
 
BigReal max_b(int pid) const
 
msm::Grid< C1Vector > subgrid_c1hermite
 
void addPotentialC1Hermite(const Grid< C1Vector > &epart)
 
void d_stencil_1d_c1hermite(Float dphi[], Float phi[], Float dpsi[], Float psi[], Float t, Float h, Float h_1)
 
Array< FoldFactor > foldfactor
 
void subtractVirialContrib()
 
void d_stencil_1d(Float dphi[], Float phi[], Float t, Float h_1)
 
msm::Array< CProxy_MsmBlock > msmBlock
 
static const int PolyDegree[NUM_APPROX]
 
void stencil_1d_c1hermite(Float phi[], Float psi[], Float t, Float h)
 
void setup(MsmGridCutoffInitMsg *bmsg)
 
static void splitting(BigReal &g, BigReal &dg, BigReal r_a, int _split)
 
msm::Array< CProxy_MsmC1HermiteBlock > msmC1HermiteBlock
 
void recvMsmC1HermiteGridCutoffProxy(MsmC1HermiteGridCutoffProxyMsg *)
 
Array< int > indexGridCutoff
 
msm::PatchPtrArray & patchPtrArray()
 
static void gc_c1hermite_elem_accum(C1Matrix &matrix, BigReal _c, Vector rv, BigReal _a, int _split)
 
Grid< C1Vector > eh_c1hermite
 
msm::Array< int > gcutAssign
 
BigReal max_c(int pid) const
 
Array< Grid< Float > > gc
 
msm::Grid< Vtype > ehProlongated
 
MsmBlockKernel(const msm::BlockIndex &)
 
int gridsize_b(void) const
 
void wrapBlockSendFold(BlockSend &bs) const
 
MsmBlock(CkMigrateMessage *m)
 
Array< Grid< Float > > gvc
 
void put(const CProxyElement_MsmC1HermiteBlock *q)
 
MsmC1HermiteGridCutoff(CkMigrateMessage *m)
 
CProxySection_MsmGridCutoff msmGridCutoffReduction
 
void put(const CProxy_MsmC1HermiteGridCutoff *p)
 
#define SET_PRIORITY(MSG, SEQ, PRIO)
 
void sendChargeC1Hermite()
 
void get(msm::Grid< T > &g, int &id, int &seq)
 
char msmBlockElementProxyData[sizeof(CProxyElement_MsmBlock)]
 
MsmGridCutoffInitMsg(const msm::BlockIndex &i, const msm::BlockSend &b)
 
void compute(GridMsg *gmsg)
 
NAMD_HOST_DEVICE Vector a() const
 
Array< Grid< C1Matrix > > gc_c1hermite
 
char msmBlockElementProxyData[sizeof(CProxyElement_MsmC1HermiteBlock)]
 
msm::Grid< Vtype > qhRestricted
 
BigReal min_b(int pid) const
 
NAMD_HOST_DEVICE Vector unit(void) const
 
ResizeArrayIter< T > end(void) const
 
const msm::Grid< Mtype > * pgvc
 
void initialize(MsmInitMsg *)
 
Float velem[C1_VECTOR_SIZE]
 
void sumReducedPotential(CkReductionMsg *msg)
 
Array< BlockSend > sendDown
 
void setupStencils(const msm::Grid< Mtype > *res, const msm::Grid< Mtype > *pro)
 
void anterpolationC1Hermite()
 
int procNum(int, const CkArrayIndex &idx)
 
const msm::Grid< Mtype > * resStencil
 
void put(const CProxy_MsmGridCutoff *p)