56   int nlevels = 
msm->nlevels;
    66   if (
msm->report_timings) {
    67     printf(
"MSM anterpolation:  %6.3f sec\n", time_delta);
    71   if (use_nonfactored) {
    72     for (level = 0;  level < nlevels - 1;  level++) {
    78     for (level = 0;  level < nlevels - 1;  level++) {
    85   if (
msm->report_timings) {
    86     printf(
"MSM restriction:    %6.3f sec\n", time_delta);
    90   if (use_cuda_gridcutoff && nlevels > 1) {
    96         printf(
"Falling back on CPU for grid cutoff computation\n");
    97         use_cuda_gridcutoff = 0;
   103       printf(
"Falling back on CPU for grid cutoff computation\n");
   104       use_cuda_gridcutoff = 0;
   110   if ( ! use_cuda_gridcutoff ) {
   111     for (level = 0;  level < nlevels - 1;  level++) {
   122   if (
msm->report_timings) {
   123     printf(
"MSM grid cutoff:    %6.3f sec\n", time_delta);
   127   if (use_nonfactored) {
   128     for (level--;  level >= 0;  level--) {
   134     for (level--;  level >= 0;  level--) {
   141   if (
msm->report_timings) {
   142     printf(
"MSM prolongation:   %6.3f sec\n", time_delta);
   150   if (
msm->report_timings) {
   151     printf(
"MSM interpolation:  %6.3f sec\n", time_delta);
   164   3, 5, 5, 7, 7, 9, 9, 3,
   172   {-1./16, 0, 9./16, 1, 9./16, 0, -1./16},
   175   {3./256, 0, -25./256, 0, 75./128, 1, 75./128, 0, -25./256, 0, 3./256},
   178   {3./256, 0, -25./256, 0, 75./128, 1, 75./128, 0, -25./256, 0, 3./256},
   181   { -5./2048, 0, 49./2048, 0, -245./2048, 0, 1225./2048, 1, 1225./2048,
   182     0, -245./2048, 0, 49./2048, 0, -5./2048 },
   185   { -5./2048, 0, 49./2048, 0, -245./2048, 0, 1225./2048, 1, 1225./2048,
   186     0, -245./2048, 0, 49./2048, 0, -5./2048 },
   189   { 35./65536, 0, -405./65536, 0, 567./16384, 0, -2205./16384, 0,
   190     19845./32768, 1, 19845./32768, 0, -2205./16384, 0, 567./16384, 0,
   191     -405./65536, 0, 35./65536 },
   194   { 35./65536, 0, -405./65536, 0, 567./16384, 0, -2205./16384, 0,
   195     19845./32768, 1, 19845./32768, 0, -2205./16384, 0, 567./16384, 0,
   196     -405./65536, 0, 35./65536 },
   199   { 1./48, 1./6, 23./48, 2./3, 23./48, 1./6, 1./48 },
   209   5, 7, 7, 9, 9, 11, 11, 7
   219   {-5, -3, -1, 0, 1, 3, 5},
   222   {-5, -3, -1, 0, 1, 3, 5},
   225   {-7, -5, -3, -1, 0, 1, 3, 5, 7},
   228   {-7, -5, -3, -1, 0, 1, 3, 5, 7},
   231   {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
   234   {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
   237   {-3, -2, -1, 0, 1, 2, 3},
   244   {-1./16, 9./16, 1, 9./16, -1./16},
   247   {3./256, -25./256, 75./128, 1, 75./128, -25./256, 3./256},
   250   {3./256, -25./256, 75./128, 1, 75./128, -25./256, 3./256},
   253   { -5./2048, 49./2048, -245./2048, 1225./2048, 1, 1225./2048,
   254     -245./2048, 49./2048, -5./2048 },
   257   { -5./2048, 49./2048, -245./2048, 1225./2048, 1, 1225./2048,
   258     -245./2048, 49./2048, -5./2048 },
   261   { 35./65536, -405./65536, 567./16384, -2205./16384, 
   262     19845./32768, 1, 19845./32768, -2205./16384, 567./16384, 
   263     -405./65536, 35./65536 },
   266   { 35./65536, -405./65536, 567./16384, -2205./16384, 
   267     19845./32768, 1, 19845./32768, -2205./16384, 567./16384, 
   268     -405./65536, 35./65536 },
   271   { 1./48, 1./6, 23./48, 2./3, 23./48, 1./6, 1./48 },
   280 #define STENCIL_1D(_phi, _delta, _approx) \   282     double *phi = _phi; \   285       case NL_MSM_APPROX_CUBIC: \   286         phi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \   288         phi[1] = (1 - t) * (1 + t - 1.5 * t * t); \   290         phi[2] = (1 + t) * (1 - t - 1.5 * t * t); \   292         phi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \   294       case NL_MSM_APPROX_QUINTIC: \   295         phi[0] = (1./24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t); \   297         phi[1] = (1-t) * (2-t) * (3-t) * ((1./6) + t * (0.375 - (5./24)*t)); \   299         phi[2] = (1-t*t) * (2-t) * (0.5 + t * (0.25 - (5./12)*t)); \   301         phi[3] = (1-t*t) * (2+t) * (0.5 - t * (0.25 + (5./12)*t)); \   303         phi[4] = (1+t) * (2+t) * (3+t) * ((1./6) - t * (0.375 + (5./24)*t)); \   305         phi[5] = (1./24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t); \   307       case NL_MSM_APPROX_QUINTIC2: \   308         phi[0] = (1./24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8); \   310         phi[1] = (-1./24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25))); \   312         phi[2] = (1./12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25)))); \   314         phi[3] = (1./12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25)))); \   316         phi[4] = (-1./24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25))); \   318         phi[5] = (1./24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8); \   320       case NL_MSM_APPROX_SEPTIC: \   321         phi[0] = (-1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6); \   323         phi[1] = (1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t)); \   325         phi[2] = (-1./240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t)); \   327         phi[3] = (1./144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t)); \   329         phi[4] = (-1./144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t)); \   331         phi[5] = (1./240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t)); \   333         phi[6] = (-1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t)); \   335         phi[7] = (1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6); \   337       case NL_MSM_APPROX_SEPTIC3: \   338         phi[0] = (3632./5) + t*((-7456./5) + t*((58786./45) + t*(-633 \   339                 + t*((26383./144) + t*((-22807./720) + t*((727./240) \   340                       + t*(-89./720))))))); \   342         phi[1] = -440 + t*((25949./20) + t*((-117131./72) + t*((2247./2) \   343                 + t*((-66437./144) + t*((81109./720) + t*((-727./48) \   344                       + t*(623./720))))))); \   346         phi[2] = (138./5) + t*((-8617./60) + t*((12873./40) + t*((-791./2) \   347                 + t*((4557./16) + t*((-9583./80) + t*((2181./80) \   348                       + t*(-623./240))))))); \   350         phi[3] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((2569./144) \   351                 + t*((-727./48) + t*(623./144))))); \   353         phi[4] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((-2569./144) \   354                 + t*((-727./48) + t*(-623./144))))); \   356         phi[5] = (138./5) + t*((8617./60) + t*((12873./40) + t*((791./2) \   357                 + t*((4557./16) + t*((9583./80) + t*((2181./80) \   358                       + t*(623./240))))))); \   360         phi[6] = -440 + t*((-25949./20) + t*((-117131./72) + t*((-2247./2) \   361                 + t*((-66437./144) + t*((-81109./720) + t*((-727./48) \   362                       + t*(-623./720))))))); \   364         phi[7] = (3632./5) + t*((7456./5) + t*((58786./45) + t*(633 \   365                 + t*((26383./144) + t*((22807./720) + t*((727./240) \   366                       + t*(89./720))))))); \   368       case NL_MSM_APPROX_NONIC: \   369         phi[0] = (-1./40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)* \   372         phi[1] = (1./40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)* \   375         phi[2] = (-1./10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)* \   378         phi[3] = (1./1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)* \   381         phi[4] = (-1./2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)* \   384         phi[5] = (1./2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)* \   387         phi[6] = (-1./1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)* \   390         phi[7] = (1./10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)* \   393         phi[8] = (-1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)* \   396         phi[9] = (1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)* \   399       case NL_MSM_APPROX_NONIC4: \   400         phi[0] = 439375./7+t*(-64188125./504+t*(231125375./2016 \   401               +t*(-17306975./288+t*(7761805./384+t*(-2895587./640 \   402                     +t*(129391./192+t*(-259715./4032+t*(28909./8064 \   403                           +t*(-3569./40320))))))))); \   405         phi[1] = -56375+t*(8314091./56+t*(-49901303./288+t*(3763529./32 \   406                 +t*(-19648027./384+t*(9469163./640+t*(-545977./192 \   407                       +t*(156927./448+t*(-28909./1152 \   408                           +t*(3569./4480))))))))); \   410         phi[2] = 68776./7+t*(-1038011./28+t*(31157515./504+t*(-956669./16 \   411                 +t*(3548009./96+t*(-2422263./160+t*(197255./48 \   412                       +t*(-19959./28+t*(144545./2016 \   413                           +t*(-3569./1120))))))))); \   415         phi[3] = -154+t*(12757./12+t*(-230123./72+t*(264481./48 \   416                 +t*(-576499./96+t*(686147./160+t*(-96277./48 \   417                       +t*(14221./24+t*(-28909./288+t*(3569./480))))))))); \   419         phi[4] = 1+t*t*(-205./144+t*t*(91./192+t*(-6181./320 \   420                 +t*(6337./96+t*(-2745./32+t*(28909./576 \   421                       +t*(-3569./320))))))); \   423         phi[5] = 1+t*t*(-205./144+t*t*(91./192+t*(6181./320 \   424                 +t*(6337./96+t*(2745./32+t*(28909./576 \   425                       +t*(3569./320))))))); \   427         phi[6] = -154+t*(-12757./12+t*(-230123./72+t*(-264481./48 \   428                 +t*(-576499./96+t*(-686147./160+t*(-96277./48 \   429                       +t*(-14221./24+t*(-28909./288+t*(-3569./480))))))))); \   431         phi[7] = 68776./7+t*(1038011./28+t*(31157515./504+t*(956669./16 \   432                 +t*(3548009./96+t*(2422263./160+t*(197255./48 \   433                       +t*(19959./28+t*(144545./2016+t*(3569./1120))))))))); \   435         phi[8] = -56375+t*(-8314091./56+t*(-49901303./288+t*(-3763529./32 \   436                 +t*(-19648027./384+t*(-9469163./640+t*(-545977./192 \   437                       +t*(-156927./448+t*(-28909./1152 \   438                           +t*(-3569./4480))))))))); \   440         phi[9] = 439375./7+t*(64188125./504+t*(231125375./2016 \   441               +t*(17306975./288+t*(7761805./384+t*(2895587./640 \   442                     +t*(129391./192+t*(259715./4032+t*(28909./8064 \   443                           +t*(3569./40320))))))))); \   445       case NL_MSM_APPROX_BSPLINE: \   446         phi[0] = (1./6) * (2-t) * (2-t) * (2-t); \   448         phi[1] = (2./3) + t*t*(-1 + 0.5*t); \   450         phi[2] = (2./3) + t*t*(-1 - 0.5*t); \   452         phi[3] = (1./6) * (2+t) * (2+t) * (2+t); \   455         return NL_MSM_ERROR_SUPPORT; \   468 #define D_STENCIL_1D(_dphi, _phi, _h_1, _delta, _approx) \   470     double *dphi = _dphi; \   471     double *phi = _phi; \   475       case NL_MSM_APPROX_CUBIC: \   476         phi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \   477         dphi[0] = (1.5 * t - 2) * (2 - t) * h_1; \   479         phi[1] = (1 - t) * (1 + t - 1.5 * t * t); \   480         dphi[1] = (-5 + 4.5 * t) * t * h_1; \   482         phi[2] = (1 + t) * (1 - t - 1.5 * t * t); \   483         dphi[2] = (-5 - 4.5 * t) * t * h_1; \   485         phi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \   486         dphi[3] = (1.5 * t + 2) * (2 + t) * h_1; \   488       case NL_MSM_APPROX_QUINTIC: \   489         phi[0] = (1./24) * (1-t) * (2-t) * (3-t) * (3-t) * (4-t); \   490         dphi[0] = ((-1./24) * ((3-t) * (3-t) * (14 + t * (-14 + 3*t)) \   491               + 2 * (1-t) * (2-t) * (3-t) * (4-t))) * h_1; \   493         phi[1] = (1-t) * (2-t) * (3-t) * ((1./6) + t * (0.375 - (5./24)*t)); \   494         dphi[1] = (-((1./6) + t * (0.375 - (5./24)*t)) * (11 + t * (-12 + 3*t))\   495             + (1-t) * (2-t) * (3-t) * (0.375 - (5./12)*t)) * h_1; \   497         phi[2] = (1-t*t) * (2-t) * (0.5 + t * (0.25 - (5./12)*t)); \   498         dphi[2] = (-(0.5 + t * (0.25 - (5./12)*t)) * (1 + t * (4 - 3*t)) \   499             + (1-t*t) * (2-t) * (0.25 - (5./6)*t)) * h_1; \   501         phi[3] = (1-t*t) * (2+t) * (0.5 - t * (0.25 + (5./12)*t)); \   502         dphi[3] = ((0.5 + t * (-0.25 - (5./12)*t)) * (1 + t * (-4 - 3*t)) \   503             - (1-t*t) * (2+t) * (0.25 + (5./6)*t)) * h_1; \   505         phi[4] = (1+t) * (2+t) * (3+t) * ((1./6) - t * (0.375 + (5./24)*t)); \   506         dphi[4] = (((1./6) + t * (-0.375 - (5./24)*t)) * (11 + t * (12 + 3*t)) \   507             - (1+t) * (2+t) * (3+t) * (0.375 + (5./12)*t)) * h_1; \   509         phi[5] = (1./24) * (1+t) * (2+t) * (3+t) * (3+t) * (4+t); \   510         dphi[5] = ((1./24) * ((3+t) * (3+t) * (14 + t * (14 + 3*t)) \   511               + 2 * (1+t) * (2+t) * (3+t) * (4+t))) * h_1; \   513       case NL_MSM_APPROX_QUINTIC2: \   514         phi[0] = (1./24) * (3-t) * (3-t) * (3-t) * (t-2) * (5*t-8); \   515         dphi[0] = ((1./24) * (3-t) * (3-t) * ((3-t)*(5*t-8) - 3*(t-2)*(5*t-8) \   516               + 5*(t-2)*(3-t))) * h_1; \   518         phi[1] = (-1./24) * (2-t) * (t-1) * (-48+t*(153+t*(-114+t*25))); \   519         dphi[1] = ((-1./24) * ((2-t)*(-48+t*(153+t*(-114+t*25))) - (t-1)* \   520               (-48+t*(153+t*(-114+t*25))) \   521               + (2-t)*(t-1)*(153+t*(-228+t*75)))) * h_1; \   523         phi[2] = (1./12) * (1-t) * (12+t*(12+t*(-3+t*(-38+t*25)))); \   524         dphi[2] = ((1./12) * (-(12+t*(12+t*(-3+t*(-38+t*25)))) \   525               + (1-t)*(12+t*(-6+t*(-114+t*100))))) * h_1; \   527         phi[3] = (1./12) * (1+t) * (12+t*(-12+t*(-3+t*(38+t*25)))); \   528         dphi[3] = ((1./12) * ((12+t*(-12+t*(-3+t*(38+t*25)))) \   529               + (1+t)*(-12+t*(-6+t*(114+t*100))))) * h_1; \   531         phi[4] = (-1./24) * (2+t) * (t+1) * (48+t*(153+t*(114+t*25))); \   532         dphi[4] = ((-1./24) * ((2+t)*(48+t*(153+t*(114+t*25))) + (t+1)* \   533               (48+t*(153+t*(114+t*25))) \   534               + (2+t)*(t+1)*(153+t*(228+t*75)))) * h_1; \   536         phi[5] = (1./24) * (3+t) * (3+t) * (3+t) * (t+2) * (5*t+8); \   537         dphi[5] = ((1./24) * (3+t) * (3+t) * ((3+t)*(5*t+8) + 3*(t+2)*(5*t+8) \   538               + 5*(t+2)*(3+t))) * h_1; \   540       case NL_MSM_APPROX_SEPTIC: \   541         phi[0] = (-1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-4)*(t-5)*(t-6); \   542         dphi[0] = (-1./720)*(t-4)*(-1944+t*(3644+t*(-2512+t*(807 \   543                   +t*(-122+t*7))))) * h_1; \   545         phi[1] = (1./720)*(t-1)*(t-2)*(t-3)*(t-4)*(t-5)*(-6+t*(-20+7*t)); \   546         dphi[1] = (1./720)*(756+t*(-9940+t*(17724+t*(-12740+t*(4445 \   547                     +t*(-750+t*49)))))) * h_1; \   549         phi[2] = (-1./240)*(t*t-1)*(t-2)*(t-3)*(t-4)*(-10+t*(-12+7*t)); \   550         dphi[2] = (-1./240)*(-28+t*(1260+t*(-756+t*(-1260+t*(1365 \   551                     +t*(-450+t*49)))))) * h_1; \   553         phi[3] = (1./144)*(t*t-1)*(t*t-4)*(t-3)*(-12+t*(-4+7*t)); \   554         dphi[3] = (1./144)*t*(-560+t*(84+t*(644+t*(-175+t*(-150+t*49))))) * \   557         phi[4] = (-1./144)*(t*t-1)*(t*t-4)*(t+3)*(-12+t*(4+7*t)); \   558         dphi[4] = (-1./144)*t*(560+t*(84+t*(-644+t*(-175+t*(150+t*49))))) * \   561         phi[5] = (1./240)*(t*t-1)*(t+2)*(t+3)*(t+4)*(-10+t*(12+7*t)); \   562         dphi[5] = (1./240)*(-28+t*(-1260+t*(-756+t*(1260+t*(1365 \   563                     +t*(450+t*49)))))) * h_1; \   565         phi[6] = (-1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(-6+t*(20+7*t)); \   566         dphi[6] = (-1./720)*(756+t*(9940+t*(17724+t*(12740+t*(4445 \   567                     +t*(750+t*49)))))) * h_1; \   569         phi[7] = (1./720)*(t+1)*(t+2)*(t+3)*(t+4)*(t+4)*(t+5)*(t+6); \   570         dphi[7] = (1./720)*(t+4)*(1944+t*(3644+t*(2512+t*(807 \   571                   +t*(122+t*7))))) * h_1; \   573       case NL_MSM_APPROX_SEPTIC3: \   574         phi[0] = (3632./5) + t*((-7456./5) + t*((58786./45) + t*(-633 \   575                 + t*((26383./144) + t*((-22807./720) + t*((727./240) \   576                       + t*(-89./720))))))); \   577         dphi[0] = ((-7456./5) + t*((117572./45) + t*(-1899 + t*((26383./36) \   578                   + t*((-22807./144) + t*((727./40) + t*(-623./720)))))))*h_1; \   580         phi[1] = -440 + t*((25949./20) + t*((-117131./72) + t*((2247./2) \   581                 + t*((-66437./144) + t*((81109./720) + t*((-727./48) \   582                       + t*(623./720))))))); \   583         dphi[1] = ((25949./20) + t*((-117131./36) + t*((6741./2) \   584                 + t*((-66437./36) + t*((81109./144) + t*((-727./8) \   585                       + t*(4361./720))))))) * h_1; \   587         phi[2] = (138./5) + t*((-8617./60) + t*((12873./40) + t*((-791./2) \   588                 + t*((4557./16) + t*((-9583./80) + t*((2181./80) \   589                       + t*(-623./240))))))); \   590         dphi[2] = ((-8617./60) + t*((12873./20) + t*((-2373./2) + t*((4557./4) \   591                   + t*((-9583./16) + t*((6543./40) + t*(-4361./240)))))))*h_1; \   593         phi[3] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((2569./144) \   594                 + t*((-727./48) + t*(623./144))))); \   595         dphi[3] = (t*((-49./18) + t*t*((-959./36) + t*((12845./144) \   596                   + t*((-727./8) + t*(4361./144)))))) * h_1; \   598         phi[4] = 1 + t*t*((-49./36) + t*t*((-959./144) + t*((-2569./144) \   599                 + t*((-727./48) + t*(-623./144))))); \   600         dphi[4] = (t*((-49./18) + t*t*((-959./36) + t*((-12845./144) \   601                   + t*((-727./8) + t*(-4361./144)))))) * h_1; \   603         phi[5] = (138./5) + t*((8617./60) + t*((12873./40) + t*((791./2) \   604                 + t*((4557./16) + t*((9583./80) + t*((2181./80) \   605                       + t*(623./240))))))); \   606         dphi[5] = ((8617./60) + t*((12873./20) + t*((2373./2) + t*((4557./4) \   607                   + t*((9583./16) + t*((6543./40) + t*(4361./240)))))))*h_1; \   609         phi[6] = -440 + t*((-25949./20) + t*((-117131./72) + t*((-2247./2) \   610                 + t*((-66437./144) + t*((-81109./720) + t*((-727./48) \   611                       + t*(-623./720))))))); \   612         dphi[6] = ((-25949./20) + t*((-117131./36) + t*((-6741./2) \   613                 + t*((-66437./36) + t*((-81109./144) + t*((-727./8) \   614                       + t*(-4361./720))))))) * h_1; \   616         phi[7] = (3632./5) + t*((7456./5) + t*((58786./45) + t*(633 \   617                 + t*((26383./144) + t*((22807./720) + t*((727./240) \   618                       + t*(89./720))))))); \   619         dphi[7] = ((7456./5) + t*((117572./45) + t*(1899 + t*((26383./36) \   620                   + t*((22807./144) + t*((727./40) + t*(623./720)))))))*h_1; \   622       case NL_MSM_APPROX_NONIC: \   623         phi[0] = (-1./40320)*(t-8)*(t-7)*(t-6)*(t-5)*(t-5)*(t-4)*(t-3)* \   625         dphi[0] = (-1./40320)*(t-5)*(-117648+t*(256552+t*(-221416 \   626                 +t*(99340+t*(-25261+t*(3667+t*(-283+t*9)))))))*h_1; \   628         phi[1] = (1./40320)*(t-7)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)* \   630         dphi[1] = (1./40320)*(71856+t*(-795368+t*(1569240+t*(-1357692 \   631                   +t*(634725+t*(-172116+t*(27090+t*(-2296+t*81))))))))*h_1; \   633         phi[2] = (-1./10080)*(t-6)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)* \   635         dphi[2] = (1./10080)*(3384+t*(-69080+t*(55026 \   636                 +t*(62580+t*(-99225+t*(51660+t*(-13104+t*(1640 \   637                           +t*(-81)))))))))*h_1; \   639         phi[3] = (1./1440)*(t-5)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)* \   641         dphi[3] = (1./1440)*(72+t*(-6344+t*(2070 \   642                 +t*(7644+t*(-4725+t*(-828+t*(1260+t*(-328+t*27))))))))*h_1; \   644         phi[4] = (-1./2880)*(t-4)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)* \   646         dphi[4] = (-1./2880)*t*(10792+t*(-972+t*(-12516 \   647                 +t*(2205+t*(3924+t*(-882+t*(-328+t*81)))))))*h_1; \   649         phi[5] = (1./2880)*(t-3)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)* \   651         dphi[5] = (1./2880)*t*(-10792+t*(-972+t*(12516 \   652                 +t*(2205+t*(-3924+t*(-882+t*(328+t*81)))))))*h_1; \   654         phi[6] = (-1./1440)*(t-2)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)* \   656         dphi[6] = (1./1440)*(-72+t*(-6344+t*(-2070 \   657                 +t*(7644+t*(4725+t*(-828+t*(-1260+t*(-328+t*(-27)))))))))*h_1; \   659         phi[7] = (1./10080)*(t-1)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)* \   661         dphi[7] = (1./10080)*(-3384+t*(-69080+t*(-55026 \   662                 +t*(62580+t*(99225+t*(51660+t*(13104+t*(1640+t*81))))))))*h_1; \   664         phi[8] = (-1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+6)*(t+7)* \   666         dphi[8] = (-1./40320)*(71856+t*(795368+t*(1569240 \   667                 +t*(1357692+t*(634725+t*(172116+t*(27090+t*(2296 \   670         phi[9] = (1./40320)*(t+1)*(t+2)*(t+3)*(t+4)*(t+5)*(t+5)*(t+6)* \   672         dphi[9] = (1./40320)*(t+5)*(117648+t*(256552+t*(221416 \   673                 +t*(99340+t*(25261+t*(3667+t*(283+t*9)))))))*h_1; \   675       case NL_MSM_APPROX_NONIC4: \   676         phi[0] = 439375./7+t*(-64188125./504+t*(231125375./2016 \   677               +t*(-17306975./288+t*(7761805./384+t*(-2895587./640 \   678                     +t*(129391./192+t*(-259715./4032+t*(28909./8064 \   679                           +t*(-3569./40320))))))))); \   680         dphi[0] = (-64188125./504+t*(231125375./1008 \   681               +t*(-17306975./96+t*(7761805./96+t*(-2895587./128 \   682                     +t*(129391./32+t*(-259715./576+t*(28909./1008 \   683                           +t*(-3569./4480)))))))))*h_1; \   685         phi[1] = -56375+t*(8314091./56+t*(-49901303./288+t*(3763529./32 \   686                 +t*(-19648027./384+t*(9469163./640+t*(-545977./192 \   687                       +t*(156927./448+t*(-28909./1152 \   688                           +t*(3569./4480))))))))); \   689         dphi[1] = (8314091./56+t*(-49901303./144+t*(11290587./32 \   690                 +t*(-19648027./96+t*(9469163./128+t*(-545977./32 \   691                       +t*(156927./64+t*(-28909./144 \   692                           +t*(32121./4480)))))))))*h_1; \   694         phi[2] = 68776./7+t*(-1038011./28+t*(31157515./504+t*(-956669./16 \   695                 +t*(3548009./96+t*(-2422263./160+t*(197255./48 \   696                       +t*(-19959./28+t*(144545./2016 \   697                           +t*(-3569./1120))))))))); \   698         dphi[2] = (-1038011./28+t*(31157515./252+t*(-2870007./16 \   699                 +t*(3548009./24+t*(-2422263./32+t*(197255./8 \   700                       +t*(-19959./4+t*(144545./252 \   701                           +t*(-32121./1120)))))))))*h_1; \   703         phi[3] = -154+t*(12757./12+t*(-230123./72+t*(264481./48 \   704                 +t*(-576499./96+t*(686147./160+t*(-96277./48 \   705                       +t*(14221./24+t*(-28909./288+t*(3569./480))))))))); \   706         dphi[3] = (12757./12+t*(-230123./36+t*(264481./16 \   707                 +t*(-576499./24+t*(686147./32+t*(-96277./8 \   708                       +t*(99547./24+t*(-28909./36 \   709                           +t*(10707./160)))))))))*h_1; \   711         phi[4] = 1+t*t*(-205./144+t*t*(91./192+t*(-6181./320 \   712                 +t*(6337./96+t*(-2745./32+t*(28909./576 \   713                       +t*(-3569./320))))))); \   714         dphi[4] = t*(-205./72+t*t*(91./48+t*(-6181./64 \   715                 +t*(6337./16+t*(-19215./32+t*(28909./72 \   716                       +t*(-32121./320)))))))*h_1; \   718         phi[5] = 1+t*t*(-205./144+t*t*(91./192+t*(6181./320 \   719                 +t*(6337./96+t*(2745./32+t*(28909./576 \   720                       +t*(3569./320))))))); \   721         dphi[5] = t*(-205./72+t*t*(91./48+t*(6181./64 \   722                 +t*(6337./16+t*(19215./32+t*(28909./72 \   723                       +t*(32121./320)))))))*h_1; \   725         phi[6] = -154+t*(-12757./12+t*(-230123./72+t*(-264481./48 \   726                 +t*(-576499./96+t*(-686147./160+t*(-96277./48 \   727                       +t*(-14221./24+t*(-28909./288+t*(-3569./480))))))))); \   728         dphi[6] = (-12757./12+t*(-230123./36+t*(-264481./16 \   729                 +t*(-576499./24+t*(-686147./32+t*(-96277./8 \   730                       +t*(-99547./24+t*(-28909./36 \   731                           +t*(-10707./160)))))))))*h_1; \   733         phi[7] = 68776./7+t*(1038011./28+t*(31157515./504+t*(956669./16 \   734                 +t*(3548009./96+t*(2422263./160+t*(197255./48 \   735                       +t*(19959./28+t*(144545./2016+t*(3569./1120))))))))); \   736         dphi[7] = (1038011./28+t*(31157515./252+t*(2870007./16 \   737                 +t*(3548009./24+t*(2422263./32+t*(197255./8 \   738                       +t*(19959./4+t*(144545./252 \   739                           +t*(32121./1120)))))))))*h_1; \   741         phi[8] = -56375+t*(-8314091./56+t*(-49901303./288+t*(-3763529./32 \   742                 +t*(-19648027./384+t*(-9469163./640+t*(-545977./192 \   743                       +t*(-156927./448+t*(-28909./1152 \   744                           +t*(-3569./4480))))))))); \   745         dphi[8] = (-8314091./56+t*(-49901303./144+t*(-11290587./32 \   746                 +t*(-19648027./96+t*(-9469163./128+t*(-545977./32 \   747                       +t*(-156927./64+t*(-28909./144 \   748                           +t*(-32121./4480)))))))))*h_1; \   750         phi[9] = 439375./7+t*(64188125./504+t*(231125375./2016 \   751               +t*(17306975./288+t*(7761805./384+t*(2895587./640 \   752                     +t*(129391./192+t*(259715./4032+t*(28909./8064 \   753                           +t*(3569./40320))))))))); \   754         dphi[9] = (64188125./504+t*(231125375./1008 \   755               +t*(17306975./96+t*(7761805./96+t*(2895587./128 \   756                     +t*(129391./32+t*(259715./576+t*(28909./1008 \   757                           +t*(3569./4480)))))))))*h_1; \   759       case NL_MSM_APPROX_BSPLINE: \   760         phi[0] = (1./6) * (2-t) * (2-t) * (2-t); \   761         dphi[0] = -0.5 * (2-t) * (2-t) * h_1; \   763         phi[1] = (2./3) + t*t*(-1 + 0.5*t); \   764         dphi[1] = t*(-2 + 1.5*t) * h_1; \   766         phi[2] = (2./3) + t*t*(-1 - 0.5*t); \   767         dphi[2] = t*(-2 - 1.5*t) * h_1; \   769         phi[3] = (1./6) * (2+t) * (2+t) * (2+t); \   770         dphi[3] = 0.5 * (2+t) * (2+t) * h_1; \   773         return NL_MSM_ERROR_SUPPORT; \   781   const double *atom = 
msm->atom;
   782   const int natoms = 
msm->numatoms;
   787   double rx_hx, ry_hy, rz_hz;      
   788 #ifndef TEST_INLINING   794   const double hx_1 = 1/
msm->hx;
   795   const double hy_1 = 1/
msm->hy;
   796   const double hz_1 = 1/
msm->hz;
   797   const double xm0 = 
msm->gx;      
   798   const double ym0 = 
msm->gy;      
   799   const double zm0 = 
msm->gz;      
   802   NL_Msmgrid_double *qhgrid = &(
msm->qh[0]);
   803   double *qh = qhgrid->data;
   804   const int ni = qhgrid->ni;
   805   const int nj = qhgrid->nj;
   806   const int nk = qhgrid->nk;
   807   const int ia = qhgrid->i0;
   808   const int ib = ia + ni - 1;
   809   const int ja = qhgrid->j0;
   810   const int jb = ja + nj - 1;
   811   const int ka = qhgrid->k0;
   812   const int kb = ka + nk - 1;
   819   int n, i, j, k, ilo, jlo, klo, koff;
   822   const int approx = 
msm->approx;
   824   const int s_edge = (
PolyDegree[approx] - 1) >> 1;  
   828   for (n = 0;  n < natoms;  n++) {
   835     rx_hx = (atom[4*n    ] - xm0) * hx_1;
   836     ry_hy = (atom[4*n + 1] - ym0) * hy_1;
   837     rz_hz = (atom[4*n + 2] - zm0) * hz_1;
   840     ilo = (int) floor(rx_hx) - s_edge;
   841     jlo = (int) floor(ry_hy) - s_edge;
   842     klo = (int) floor(rz_hz) - s_edge;
   845 #ifndef TEST_INLINING   846     delta = rx_hx - (double) ilo;
   848     delta = ry_hy - (double) jlo;
   850     delta = rz_hz - (double) klo;
   853     t = rx_hx - (double) ilo;
   854         xphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
   856         xphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
   858         xphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
   860         xphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
   862     t = ry_hy - (double) jlo;
   863         yphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
   865         yphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
   867         yphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
   869         yphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
   871     t = rz_hz - (double) klo;
   872         zphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
   874         zphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
   876         zphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
   878         zphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
   883     iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
   884                  ja <= jlo && (jlo+(s_size-1)) <= jb &&
   885                  ka <= klo && (klo+(s_size-1)) <= kb );
   890       for (k = 0;  k < s_size;  k++) {
   891         koff = (k + klo) * nj;
   893         for (j = 0;  j < s_size;  j++) {
   894           jkoff = (koff + (j + jlo)) * ni;
   896           for (i = 0;  i < s_size;  i++) {
   897             index = jkoff + (i + ilo);
   900             qh[index] += xphi[i] * cjk;
   911         if      (ilo < ia) 
do { ilo += ni; } 
while (ilo < ia);
   912         else if (ilo > ib) 
do { ilo -= ni; } 
while (ilo > ib);
   914       else if (ilo < ia || (ilo+(s_size-1)) > ib) {
   919         if      (jlo < ja) 
do { jlo += nj; } 
while (jlo < ja);
   920         else if (jlo > jb) 
do { jlo -= nj; } 
while (jlo > jb);
   922       else if (jlo < ja || (jlo+(s_size-1)) > jb) {
   927         if      (klo < ka) 
do { klo += nk; } 
while (klo < ka);
   928         else if (klo > kb) 
do { klo -= nk; } 
while (klo > kb);
   930       else if (klo < ka || (klo+(s_size-1)) > kb) {
   935       for (k = 0, kp = klo;  k < s_size;  k++, kp++) {
   936         if (kp > kb) kp = ka;  
   939         for (j = 0, jp = jlo;  j < s_size;  j++, jp++) {
   940           if (jp > jb) jp = ja;  
   941           jkoff = (koff + jp) * ni;
   943           for (i = 0, ip = ilo;  i < s_size;  i++, ip++) {
   944             if (ip > ib) ip = ia;  
   948             qh[index] += xphi[i] * cjk;
   962   double *f = 
msm->felec;
   963   const double *atom = 
msm->atom;
   964   const int natoms = 
msm->numatoms;
   972   double rx_hx, ry_hy, rz_hz;      
   973 #ifndef TEST_INLINING   978   const double hx_1 = 1/
msm->hx;
   979   const double hy_1 = 1/
msm->hy;
   980   const double hz_1 = 1/
msm->hz;
   981   const double xm0 = 
msm->gx;      
   982   const double ym0 = 
msm->gy;      
   983   const double zm0 = 
msm->gz;      
   987   const NL_Msmgrid_double *ehgrid = &(
msm->eh[0]);
   988   const double *eh = ehgrid->data;
   989   const double *ebuf = ehgrid->buffer;
   990   const NL_Msmgrid_double *qhgrid = &(
msm->qh[0]);
   991   const double *qbuf = qhgrid->buffer;
   992   const int ni = ehgrid->ni;
   993   const int nj = ehgrid->nj;
   994   const int nk = ehgrid->nk;
   995   const int ia = ehgrid->i0;
   996   const int ib = ia + ni - 1;
   997   const int ja = ehgrid->j0;
   998   const int jb = ja + nj - 1;
   999   const int ka = ehgrid->k0;
  1000   const int kb = ka + nk - 1;
  1007   double fx, fy, fz, cx, cy, cz;
  1009   int n, i, j, k, ilo, jlo, klo, koff;
  1011   const int nn = (ni*nj) * nk;
  1013   const int approx = 
msm->approx;
  1015   const int s_edge = (
PolyDegree[approx] - 1) >> 1;  
  1017   for (n = 0;  n < natoms;  n++) {
  1024     rx_hx = (atom[4*n    ] - xm0) * hx_1;
  1025     ry_hy = (atom[4*n + 1] - ym0) * hy_1;
  1026     rz_hz = (atom[4*n + 2] - zm0) * hz_1;
  1029     ilo = (int) floor(rx_hx) - s_edge;
  1030     jlo = (int) floor(ry_hy) - s_edge;
  1031     klo = (int) floor(rz_hz) - s_edge;
  1034 #ifndef TEST_INLINING  1035     delta = rx_hx - (double) ilo;
  1038     delta = ry_hy - (double) jlo;
  1041     delta = rz_hz - (double) klo;
  1045     t = rx_hx - (double) ilo;
  1046         xphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
  1047         dxphi[0] = (1.5 * t - 2) * (2 - t) * hx_1; \
  1049         xphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
  1050         dxphi[1] = (-5 + 4.5 * t) * t * hx_1; \
  1052         xphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
  1053         dxphi[2] = (-5 - 4.5 * t) * t * hx_1; \
  1055         xphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
  1056         dxphi[3] = (1.5 * t + 2) * (2 + t) * hx_1; \
  1058     t = ry_hy - (double) jlo;
  1059         yphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
  1060         dyphi[0] = (1.5 * t - 2) * (2 - t) * hy_1; \
  1062         yphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
  1063         dyphi[1] = (-5 + 4.5 * t) * t * hy_1; \
  1065         yphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
  1066         dyphi[2] = (-5 - 4.5 * t) * t * hy_1; \
  1068         yphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
  1069         dyphi[3] = (1.5 * t + 2) * (2 + t) * hy_1; \
  1071     t = rz_hz - (double) klo;
  1072         zphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \
  1073         dzphi[0] = (1.5 * t - 2) * (2 - t) * hz_1; \
  1075         zphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \
  1076         dzphi[1] = (-5 + 4.5 * t) * t * hz_1; \
  1078         zphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \
  1079         dzphi[2] = (-5 - 4.5 * t) * t * hz_1; \
  1081         zphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \
  1082         dzphi[3] = (1.5 * t + 2) * (2 + t) * hz_1; \
  1087     iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
  1088                  ja <= jlo && (jlo+(s_size-1)) <= jb &&
  1089                  ka <= klo && (klo+(s_size-1)) <= kb );
  1095       for (k = 0;  k < s_size;  k++) {
  1096         koff = (k + klo) * nj;
  1097         for (j = 0;  j < s_size;  j++) {
  1098           jkoff = (koff + (j + jlo)) * ni;
  1099           cx = yphi[j] * zphi[k];
  1100           cy = dyphi[j] * zphi[k];
  1101           cz = yphi[j] * dzphi[k];
  1102           for (i = 0;  i < s_size;  i++) {
  1103             index = jkoff + (i + ilo);
  1106             fx += eh[index] * dxphi[i] * cx;
  1107             fy += eh[index] * xphi[i] * cy;
  1108             fz += eh[index] * xphi[i] * cz;
  1119         if      (ilo < ia) 
do { ilo += ni; } 
while (ilo < ia);
  1120         else if (ilo > ib) 
do { ilo -= ni; } 
while (ilo > ib);
  1122       else if (ilo < ia || (ilo+(s_size-1)) > ib) {
  1127         if      (jlo < ja) 
do { jlo += nj; } 
while (jlo < ja);
  1128         else if (jlo > jb) 
do { jlo -= nj; } 
while (jlo > jb);
  1130       else if (jlo < ja || (jlo+(s_size-1)) > jb) {
  1135         if      (klo < ka) 
do { klo += nk; } 
while (klo < ka);
  1136         else if (klo > kb) 
do { klo -= nk; } 
while (klo > kb);
  1138       else if (klo < ka || (klo+(s_size-1)) > kb) {
  1144       for (k = 0, kp = klo;  k < s_size;  k++, kp++) {
  1145         if (kp > kb) kp = ka;  
  1147         for (j = 0, jp = jlo;  j < s_size;  j++, jp++) {
  1148           if (jp > jb) jp = ja;  
  1149           jkoff = (koff + jp) * ni;
  1150           cx = yphi[j] * zphi[k];
  1151           cy = dyphi[j] * zphi[k];
  1152           cz = yphi[j] * dzphi[k];
  1153           for (i = 0, ip = ilo;  i < s_size;  i++, ip++) {
  1154             if (ip > ib) ip = ia;  
  1158             fx += eh[index] * dxphi[i] * cx;
  1159             fy += eh[index] * xphi[i] * cy;
  1160             fz += eh[index] * xphi[i] * cz;
  1177     for (n = 0;  n < natoms;  n++) {
  1178       double q = atom[4*n + 3];
  1183   for (index = 0;  index < nn;  index++) {
  1184     u += qbuf[index] * ebuf[index];
  1186   msm->uelec += 0.5 * u;
  1195   const NL_Msmgrid_double *qhgrid = &(
msm->qh[level]);
  1196   const double *qh = qhgrid->data;
  1197   NL_Msmgrid_double *q2hgrid = &(
msm->qh[level+1]);
  1198   double *q2h = q2hgrid->data;
  1201   const int ia1 = qhgrid->i0;             
  1202   const int ib1 = ia1 + qhgrid->ni - 1;   
  1203   const int ja1 = qhgrid->j0;             
  1204   const int jb1 = ja1 + qhgrid->nj - 1;   
  1205   const int ka1 = qhgrid->k0;             
  1206   const int kb1 = ka1 + qhgrid->nk - 1;   
  1207   const int ni1 = qhgrid->ni;             
  1208   const int nj1 = qhgrid->nj;             
  1209   const int nk1 = qhgrid->nk;             
  1212   const int ia2 = q2hgrid->i0;            
  1213   const int ib2 = ia2 + q2hgrid->ni - 1;  
  1214   const int ja2 = q2hgrid->j0;            
  1215   const int jb2 = ja2 + q2hgrid->nj - 1;  
  1216   const int ka2 = q2hgrid->k0;            
  1217   const int kb2 = ka2 + q2hgrid->nk - 1;  
  1218   const int nrow_q2 = q2hgrid->ni;
  1219   const int nstride_q2 = nrow_q2 * q2hgrid->nj;
  1226   double *qzd = 
msm->lzd + (-ka1);
  1227   double *qyzd = 
msm->lyzd + (-ka1*nj1 + -ja1);
  1230   const double *phi = NULL;
  1235   int index_plane_q2, index_q2;
  1236   int index_jk, offset_k;
  1241   const int diam_stencil = 2*r_stencil + 1;       
  1243   for (i2 = ia2;  i2 <= ib2;  i2++) {
  1245     for (k = ka1;  k <= kb1;  k++) {
  1248       for (j = ja1;  j <= jb1;  j++) {
  1249         index_jk = offset_k + j;
  1250         offset = index_jk * ni1;
  1254           int lower = im - r_stencil;
  1255           int upper = im + r_stencil;
  1256           if (lower < ia1) lower = ia1;
  1257           if (upper > ib1) upper = ib1;  
  1258           phi = phi_factored + r_stencil;  
  1259           for (i = lower;  i <= upper;  i++) {
  1260             qsum += phi[i-im] * qh[offset + i];
  1264           int ip = im - r_stencil;  
  1265           if (ip < ia1) 
do { ip += ni1; } 
while (ip < ia1);  
  1267           for (i = 0;  i < diam_stencil;  i++, ip++) {
  1268             if (ip > ib1) ip = ia1;  
  1269             qsum += phi[i] * qh[offset + ip];
  1272         qyzd[index_jk] = qsum;
  1277     for (j2 = ja2;  j2 <= jb2;  j2++) {
  1278       index_plane_q2 = j2 * nrow_q2 + i2;
  1280       for (k = ka1;  k <= kb1;  k++) {
  1285           int lower = jm - r_stencil;
  1286           int upper = jm + r_stencil;
  1287           if (lower < ja1) lower = ja1;
  1288           if (upper > jb1) upper = jb1;  
  1289           phi = phi_factored + r_stencil;  
  1290           for (j = lower;  j <= upper;  j++) {
  1291             qsum += phi[j-jm] * qyzd[offset + j];
  1295           int jp = jm - r_stencil;  
  1296           if (jp < ja1) 
do { jp += nj1; } 
while (jp < ja1);  
  1298           for (j = 0;  j < diam_stencil;  j++, jp++) {
  1299             if (jp > jb1) jp = ja1;  
  1300             qsum += phi[j] * qyzd[offset + jp];
  1306       for (k2 = ka2;  k2 <= kb2;  k2++) {
  1307         index_q2 = k2 * nstride_q2 + index_plane_q2;
  1311           int lower = km - r_stencil;
  1312           int upper = km + r_stencil;
  1313           if (lower < ka1) lower = ka1;
  1314           if (upper > kb1) upper = kb1;  
  1315           phi = phi_factored + r_stencil;  
  1316           for (k = lower;  k <= upper;  k++) {
  1317             qsum += phi[k-km] * qzd[k];
  1321           int kp = km - r_stencil;  
  1322           if (kp < ka1) 
do { kp += nk1; } 
while (kp < ka1);  
  1324           for (k = 0;  k < diam_stencil;  k++, kp++) {
  1325             if (kp > kb1) kp = ka1;  
  1326             qsum += phi[k] * qzd[kp];
  1329         q2h[index_q2] = qsum;
  1342   NL_Msmgrid_double *ehgrid = &(
msm->eh[level]);
  1343   double *eh = ehgrid->data;
  1344   const NL_Msmgrid_double *e2hgrid = &(
msm->eh[level+1]);
  1345   const double *e2h = e2hgrid->data;
  1348   const int ia1 = ehgrid->i0;             
  1349   const int ib1 = ia1 + ehgrid->ni - 1;   
  1350   const int ja1 = ehgrid->j0;             
  1351   const int jb1 = ja1 + ehgrid->nj - 1;   
  1352   const int ka1 = ehgrid->k0;             
  1353   const int kb1 = ka1 + ehgrid->nk - 1;   
  1354   const int ni1 = ehgrid->ni;             
  1355   const int nj1 = ehgrid->nj;             
  1356   const int nk1 = ehgrid->nk;             
  1359   const int ia2 = e2hgrid->i0;            
  1360   const int ib2 = ia2 + e2hgrid->ni - 1;  
  1361   const int ja2 = e2hgrid->j0;            
  1362   const int jb2 = ja2 + e2hgrid->nj - 1;  
  1363   const int ka2 = e2hgrid->k0;            
  1364   const int kb2 = ka2 + e2hgrid->nk - 1;  
  1365   const int nrow_e2 = e2hgrid->ni;
  1366   const int nstride_e2 = nrow_e2 * e2hgrid->nj;
  1373   double *ezd = 
msm->lzd + (-ka1);
  1374   double *eyzd = 
msm->lyzd + (-ka1*nj1 + -ja1);
  1376   const size_t size_lzd = nk1 * 
sizeof(double);
  1377   const size_t size_lyzd = nj1 * nk1 * 
sizeof(double);
  1379   const double *phi = NULL;
  1384   int index_plane_e2, index_e2;
  1385   int index_jk, offset_k;
  1390   const int diam_stencil = 2*r_stencil + 1;       
  1392   for (i2 = ia2;  i2 <= ib2;  i2++) {
  1393     memset(
msm->lyzd, 0, size_lyzd);
  1395     for (j2 = ja2;  j2 <= jb2;  j2++) {
  1396       memset(
msm->lzd, 0, size_lzd);
  1397       index_plane_e2 = j2 * nrow_e2 + i2;
  1399       for (k2 = ka2;  k2 <= kb2;  k2++) {
  1400         index_e2 = k2 * nstride_e2 + index_plane_e2;
  1403           int lower = km - r_stencil;
  1404           int upper = km + r_stencil;
  1405           if (lower < ka1) lower = ka1;
  1406           if (upper > kb1) upper = kb1;  
  1407           phi = phi_factored + r_stencil;  
  1408           for (k = lower;  k <= upper;  k++) {
  1409             ezd[k] += phi[k-km] * e2h[index_e2];
  1413           int kp = km - r_stencil;  
  1414           if (kp < ka1) 
do { kp += nk1; } 
while (kp < ka1);  
  1416           for (k = 0;  k < diam_stencil;  k++, kp++) {
  1417             if (kp > kb1) kp = ka1;  
  1418             ezd[kp] += phi[k] * e2h[index_e2];
  1423       for (k = ka1;  k <= kb1;  k++) {
  1427           int lower = jm - r_stencil;
  1428           int upper = jm + r_stencil;
  1429           if (lower < ja1) lower = ja1;
  1430           if (upper > jb1) upper = jb1;  
  1431           phi = phi_factored + r_stencil;  
  1432           for (j = lower;  j <= upper;  j++) {
  1433             eyzd[offset + j] += phi[j-jm] * ezd[k];
  1437           int jp = jm - r_stencil;  
  1438           if (jp < ja1) 
do { jp += nj1; } 
while (jp < ja1);  
  1440           for (j = 0;  j < diam_stencil;  j++, jp++) {
  1441             if (jp > jb1) jp = ja1;  
  1442             eyzd[offset + jp] += phi[j] * ezd[k];
  1449     for (k = ka1;  k <= kb1;  k++) {
  1452       for (j = ja1;  j <= jb1;  j++) {
  1453         index_jk = offset_k + j;
  1454         offset = index_jk * ni1;
  1457           int lower = im - r_stencil;
  1458           int upper = im + r_stencil;
  1459           if (lower < ia1) lower = ia1;
  1460           if (upper > ib1) upper = ib1;  
  1461           phi = phi_factored + r_stencil;  
  1462           for (i = lower;  i <= upper;  i++) {
  1463             eh[offset + i] += phi[i-im] * eyzd[index_jk];
  1467           int ip = im - r_stencil;  
  1468           if (ip < ia1) 
do { ip += ni1; } 
while (ip < ia1);  
  1470           for (i = 0;  i < diam_stencil;  i++, ip++) {
  1471             if (ip > ib1) ip = ia1;  
  1472             eh[offset + ip] += phi[i] * eyzd[index_jk];
  1487   const NL_Msmgrid_double *qhgrid = &(
msm->qh[level]);
  1488   const double *qh = qhgrid->data;        
  1489   NL_Msmgrid_double *q2hgrid = &(
msm->qh[level+1]);
  1490   double *q2h_buffer = q2hgrid->buffer;   
  1493   const int ia1 = qhgrid->i0;             
  1494   const int ib1 = ia1 + qhgrid->ni - 1;   
  1495   const int ja1 = qhgrid->j0;             
  1496   const int jb1 = ja1 + qhgrid->nj - 1;   
  1497   const int ka1 = qhgrid->k0;             
  1498   const int kb1 = ka1 + qhgrid->nk - 1;   
  1499   const int ni1 = qhgrid->ni;             
  1500   const int nj1 = qhgrid->nj;             
  1501   const int nk1 = qhgrid->nk;             
  1504   const int ia2 = q2hgrid->i0;            
  1505   const int ib2 = ia2 + q2hgrid->ni - 1;  
  1506   const int ja2 = q2hgrid->j0;            
  1507   const int jb2 = ja2 + q2hgrid->nj - 1;  
  1508   const int ka2 = q2hgrid->k0;            
  1509   const int kb2 = ka2 + q2hgrid->nk - 1;  
  1519   double q2h_sum, cjk;
  1521   int i1, j1, k1, index1, jk1off, k1off;
  1526   for (index2 = 0, k2 = ka2;  k2 <= kb2;  k2++) {
  1528     for (j2 = ja2;  j2 <= jb2;  j2++) {
  1530       for (i2 = ia2;  i2 <= ib2;  i2++, index2++) {
  1534         for (k = 0;  k < nstencil;  k++) {
  1535           k1off = k1 + offset[k];
  1537             if (ispz) 
do { k1off += nk1; } 
while (k1off < ka1);
  1540           else if (k1off > kb1) {
  1541             if (ispz) 
do { k1off -= nk1; } 
while (k1off > kb1);
  1545           for (j = 0;  j < nstencil;  j++) {
  1546             jk1off = j1 + offset[j];
  1548               if (ispy) 
do { jk1off += nj1; } 
while (jk1off < ja1);
  1551             else if (jk1off > jb1) {
  1552               if (ispy) 
do { jk1off -= nj1; } 
while (jk1off > jb1);
  1555             jk1off = (k1off + jk1off) * ni1;
  1556             cjk = phi[j] * phi[k];
  1557             for (i = 0;  i < nstencil;  i++) {
  1558               index1 = i1 + offset[i];
  1560                 if (ispx) 
do { index1 += ni1; } 
while (index1 < ia1);
  1563               else if (index1 > ib1) {
  1564                 if (ispx) 
do { index1 -= ni1; } 
while (index1 > ib1);
  1568               q2h_sum += qh[index1] * phi[i] * cjk;
  1573         q2h_buffer[index2] = q2h_sum;  
  1584   NL_Msmgrid_double *ehgrid = &(
msm->eh[level]);
  1585   double *eh = ehgrid->data;        
  1586   const NL_Msmgrid_double *e2hgrid = &(
msm->eh[level+1]);
  1587   const double *e2h_buffer = e2hgrid->buffer;   
  1590   const int ia1 = ehgrid->i0;             
  1591   const int ib1 = ia1 + ehgrid->ni - 1;   
  1592   const int ja1 = ehgrid->j0;             
  1593   const int jb1 = ja1 + ehgrid->nj - 1;   
  1594   const int ka1 = ehgrid->k0;             
  1595   const int kb1 = ka1 + ehgrid->nk - 1;   
  1596   const int ni1 = ehgrid->ni;             
  1597   const int nj1 = ehgrid->nj;             
  1598   const int nk1 = ehgrid->nk;             
  1601   const int ia2 = e2hgrid->i0;            
  1602   const int ib2 = ia2 + e2hgrid->ni - 1;  
  1603   const int ja2 = e2hgrid->j0;            
  1604   const int jb2 = ja2 + e2hgrid->nj - 1;  
  1605   const int ka2 = e2hgrid->k0;            
  1606   const int kb2 = ka2 + e2hgrid->nk - 1;  
  1618   int i1, j1, k1, index1, jk1off, k1off;
  1623   for (index2 = 0, k2 = ka2;  k2 <= kb2;  k2++) {
  1625     for (j2 = ja2;  j2 <= jb2;  j2++) {
  1627       for (i2 = ia2;  i2 <= ib2;  i2++, index2++) {
  1630         for (k = 0;  k < nstencil;  k++) {
  1631           k1off = k1 + offset[k];
  1633             if (ispz) 
do { k1off += nk1; } 
while (k1off < ka1);
  1636           else if (k1off > kb1) {
  1637             if (ispz) 
do { k1off -= nk1; } 
while (k1off > kb1);
  1641           for (j = 0;  j < nstencil;  j++) {
  1642             jk1off = j1 + offset[j];
  1644               if (ispy) 
do { jk1off += nj1; } 
while (jk1off < ja1);
  1647             else if (jk1off > jb1) {
  1648               if (ispy) 
do { jk1off -= nj1; } 
while (jk1off > jb1);
  1651             jk1off = (k1off + jk1off) * ni1;
  1652             cjk = phi[j] * phi[k];
  1653             for (i = 0;  i < nstencil;  i++) {
  1654               index1 = i1 + offset[i];
  1656                 if (ispx) 
do { index1 += ni1; } 
while (index1 < ia1);
  1659               else if (index1 > ib1) {
  1660                 if (ispx) 
do { index1 -= ni1; } 
while (index1 > ib1);
  1664               eh[index1] += e2h_buffer[index2] * phi[i] * cjk;
  1682   const NL_Msmgrid_double *qhgrid = &(
msm->qh[level]);
  1683   const double *qh = qhgrid->data;
  1684   NL_Msmgrid_double *ehgrid = &(
msm->eh[level]);
  1685   double *eh = ehgrid->data;
  1686   const int ia = qhgrid->i0;            
  1687   const int ib = ia + qhgrid->ni - 1;   
  1688   const int ja = qhgrid->j0;            
  1689   const int jb = ja + qhgrid->nj - 1;   
  1690   const int ka = qhgrid->k0;            
  1691   const int kb = ka + qhgrid->nk - 1;   
  1692   const int ni = qhgrid->ni;            
  1693   const int nj = qhgrid->nj;            
  1694   const int nk = qhgrid->nk;            
  1697   const NL_Msmgrid_double *gcgrid = &(
msm->gc[level]);
  1698   const double *gc = gcgrid->data;
  1699   const int gia = gcgrid->i0;            
  1700   const int gib = gia + gcgrid->ni - 1;  
  1701   const int gja = gcgrid->j0;            
  1702   const int gjb = gja + gcgrid->nj - 1;  
  1703   const int gka = gcgrid->k0;            
  1704   const int gkb = gka + gcgrid->nk - 1;  
  1705   const int gni = gcgrid->ni;            
  1706   const int gnj = gcgrid->nj;            
  1712   const int ispnone = !(ispx || ispy || ispz);
  1715   int gia_clip, gib_clip;
  1716   int gja_clip, gjb_clip;
  1717   int gka_clip, gkb_clip;
  1722   long jknoff, nindex;
  1723   int kgoff, jkgoff, ngindex;
  1728     for (k = ka;  k <= kb;  k++) {
  1731       gka_clip = (k + gka < ka ? ka - k : gka);
  1732       gkb_clip = (k + gkb > kb ? kb - k : gkb);
  1736       for (j = ja;  j <= jb;  j++) {
  1739         gja_clip = (j + gja < ja ? ja - j : gja);
  1740         gjb_clip = (j + gjb > jb ? jb - j : gjb);
  1742         jkoff = (koff + j) * ni;  
  1744         for (i = ia;  i <= ib;  i++) {
  1747           gia_clip = (i + gia < ia ? ia - i : gia);
  1748           gib_clip = (i + gib > ib ? ib - i : gib);
  1754           for (kd = gka_clip;  kd <= gkb_clip;  kd++) {
  1755             knoff = (k + kd) * nj;  
  1758             for (jd = gja_clip;  jd <= gjb_clip;  jd++) {
  1759               jknoff = (knoff + (j + jd)) * ni;  
  1760               jkgoff = (kgoff + jd) * gni;       
  1762               for (
id = gia_clip;  
id <= gib_clip;  
id++) {
  1763                 nindex = jknoff + (i + id);  
  1764                 ngindex = jkgoff + id;       
  1772                 eh_sum += qh[nindex] * gc[ngindex];  
  1791     for (k = ka;  k <= kb;  k++) {
  1795         gka_clip = (k + gka < ka ? ka - k : gka);
  1796         gkb_clip = (k + gkb > kb ? kb - k : gkb);
  1797         if (klo < ka) klo = ka;  
  1802         if (klo < ka) 
do { klo += nk; } 
while (klo < ka);
  1808       for (j = ja;  j <= jb;  j++) {
  1812           gja_clip = (j + gja < ja ? ja - j : gja);
  1813           gjb_clip = (j + gjb > jb ? jb - j : gjb);
  1814           if (jlo < ja) jlo = ja;  
  1819           if (jlo < ja) 
do { jlo += nj; } 
while (jlo < ja);
  1823         jkoff = (koff + j) * ni;  
  1825         for (i = ia;  i <= ib;  i++) {
  1829             gia_clip = (i + gia < ia ? ia - i : gia);
  1830             gib_clip = (i + gib > ib ? ib - i : gib);
  1831             if (ilo < ia) ilo = ia;  
  1836             if (ilo < ia) 
do { ilo += ni; } 
while (ilo < ia);
  1844           for (kd = gka_clip, kp = klo;  kd <= gkb_clip;  kd++, kp++) {
  1846             if (kp > kb) kp = ka;  
  1850             for (jd = gja_clip, jp = jlo;  jd <= gjb_clip;  jd++, jp++) {
  1852               if (jp > jb) jp = ja;              
  1853               jknoff = (knoff + jp) * ni;  
  1854               jkgoff = (kgoff + jd) * gni;       
  1856               for (
id = gia_clip, ip = ilo;  
id <= gib_clip;  
id++, ip++) {
  1858                 if (ip > ib) ip = ia;   
  1859                 nindex = jknoff +  ip;  
  1860                 ngindex = jkgoff + id;  
  1868                 eh_sum += qh[nindex] * gc[ngindex];  
 
double wkf_timer_time(wkf_timerhandle v)
 
static int prolongation_factored(NL_Msm *, int level)
 
static int restriction_factored(NL_Msm *, int level)
 
static const int Nstencil[NL_MSM_APPROX_END]
 
static int anterpolation(NL_Msm *)
 
#define GRID_INDEX_CHECK(a, _i, _j, _k)
 
static const double PhiStencilFactored[NL_MSM_APPROX_END][2 *MAX_POLY_DEGREE+1]
 
static const int IndexOffset[NL_MSM_APPROX_END][MAX_NSTENCIL]
 
void wkf_timer_stop(wkf_timerhandle v)
 
static int restriction(NL_Msm *, int level)
 
#define GRID_INDEX(_p, _i, _j, _k)
 
#define STENCIL_1D(_phi, _delta, _approx)
 
static const double PhiStencil[NL_MSM_APPROX_END][MAX_NSTENCIL]
 
static int interpolation(NL_Msm *)
 
static int gridcutoff(NL_Msm *, int level)
 
void wkf_timer_start(wkf_timerhandle v)
 
int NL_msm_compute_long_range(NL_Msm *msm)
 
static const int PolyDegree[NL_MSM_APPROX_END]
 
#define D_STENCIL_1D(_dphi, _phi, _h_1, _delta, _approx)
 
static int prolongation(NL_Msm *, int level)