18 #include "ComputeMoaMgr.decl.h"      20 #define ALLOCA(TYPE,NAME,SIZE) TYPE *NAME = (TYPE *) alloca((SIZE)*sizeof(TYPE))    22 static void dftmod(
double *bsp_mod, 
double *bsp_arr, 
int nfft) {
    24   double twopi, arg, sum1, sum2;
    25   double infft = 1.0/nfft;
    30   for (k = 0; k <nfft; ++k) {
    33     for (j = 0; j < nfft; ++j) {
    34       arg = twopi * k * j * infft;
    35       sum1 += bsp_arr[j] * cos(arg);
    36       sum2 += bsp_arr[j] * sin(arg);
    38     bsp_mod[k] = sum1*sum1 + sum2*sum2;
    46   double *M = 
new double[3*
order];
    47   double *dM = 
new double[3*
order];
    48   double *scratch = 
new double[K];
    50   fr[0]=fr[1]=fr[2]=0.0;
    52   for (i=0; i<
order; i++) bm[i] = M[i];
    53   for (i=
order; i<K; i++) bm[i] = 0.0;
    55   for (i=0; i<K; i++) bm[i] = 1.0/scratch[i];
    64   : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
    65         k3_start(K3_start), k3_end(K3_end) {
    66   int K1, K2, K3, 
order;
    73   exp1 = 
new double[K1/2 + 1];
    74   exp2 = 
new double[K2/2 + 1];
    75   exp3 = 
new double[K3/2 + 1];
    83 #ifdef OPENATOM_VERSION    85   : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
    86         k3_start(K3_start), k3_end(K3_end) {
    87   int K1, K2, K3, 
order;
    96   exp1 = 
new double[K1/2 + 1];
    97   exp2 = 
new double[K2/2 + 1];
    98   exp3 = 
new double[K3/2 + 1];
   105     CkPrintf(
"######################################################\n");
   106     CkPrintf(
"Entering recvBmX loop on processor %d \n", CkMyPe() );
   108     for ( i=0; i<=K1; ++i) {
   109     CkPrintf(
"bm1 value pre-send %d = %d \n", i, bm1[i] );
   111     for ( i=0; i<=K1; ++i) {
   112     CkPrintf(
"bm1 reference pre-send %d = %d \n", i, &bm1[i] );
   114     CkPrintf(
"bm1: %d \n", *bm1);    
   115     moaProxy[CkMyPe()].recvB(K2_start, K2_end, K3_start, K3_end, K1, K2, K3, bm1, bm2, bm3, 
order);
   123 #endif // OPENATOM_VERSION   147     K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3;
   149     double recipx = recips[0];
   150     double recipy = recips[1];
   151     double recipz = recips[2];
   153     int ind = k1from*(k2_end-k2_start)*(k3_end-k3_start)*2;
   155     for ( k1=k1from; k1<=k1to; ++k1 ) {        
   156         double m1, m11, b1, xp1;
   158         int k1_s = k1<=K1/2 ? k1 : k1-K1;
   161         xp1 = i_pi_volume*exp1[abs(k1_s)];
   162         for ( k2=k2_start; k2<k2_end; ++k2 ) {
   163             double m2, m22, b1b2, xp2;
   165             int k2_s = k2<=K2/2 ? k2 : k2-K2;
   168             xp2 = exp2[abs(k2_s)]*xp1;
   171             if (k1==0 && k2==0 && k3==0) {
   176             for (; k3<k3_end; ++k3 ) {
   177               double m3, m33, xp3, msq, imsq, vir, fac;
   178               double theta3, theta, q2, qr, qc, C;
   179               theta3 = bm3[k3] *b1b2;
   183               qr = q_arr[ind]; qc=q_arr[ind+1];
   184               q2 = 2*(qr*qr + qc*qc)*theta3;
   185               if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
   186               msq = m11 + m22 + m33;
   191               q_arr[ind+1] *= theta;
   192               vir = -2*(piob+imsq);
   195               v0 += fac*(1.0+vir*m11);
   198               v3 += fac*(1.0+vir*m22);
   200               v5 += fac*(1.0+vir*m33);
   206     *partialEnergy = 0.5*energy;
   207     partialVirial[0] = 0.5*v0;
   208     partialVirial[1] = 0.5*v1;
   209     partialVirial[2] = 0.5*v2;
   210     partialVirial[3] = 0.5*v3;
   211     partialVirial[4] = 0.5*v4;
   212     partialVirial[5] = 0.5*v5;
   215   for ( 
int i = first; i <= last; ++i ) {
   216     void **params = (
void **)param;
   218     float *q_arr = (
float *)params[1];
   219     double *recips = (
double *)params[2];
   220     double *partialEnergy = (
double *)params[3];
   221     double *partialVirial = (
double *)params[4];
   222     int *unitDist = (
int *)params[5];
   224     int unit = unitDist[0];
   225     int remains = unitDist[1];
   231         k1from = remains*(unit+1)+(i-remains)*unit;
   232         k1to = k1from+unit-1;
   234     double *pEnergy = partialEnergy+i;
   235     double *pVirial = partialVirial+i*6;
   253   K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3;
   260     double recipx = lattice.
a_r().
x;
   261     double recipy = lattice.
b_r().
y;
   262     double recipz = lattice.
c_r().
z;
   264     init_exp(exp1, K1, 0, K1, recipx);
   265     init_exp(exp2, K2, k2_start, k2_end, recipy);
   266     init_exp(exp3, K3, k3_start, k3_end, recipz);
   268     double recips[] = {recipx, recipy, recipz};
   269     int NPARTS=CmiMyNodeSize(); 
   270     int maxParts = ( K1 * ( k2_end - k2_start ) * ( k3_end - k3_start ) + 127 ) / 128;
   271     if ( NPARTS >  maxParts ) NPARTS = maxParts;
   272     if ( NPARTS >  K1 ) NPARTS = K1; 
   273     ALLOCA(
double, partialEnergy, NPARTS);
   274     ALLOCA(
double, partialVirial, 6*NPARTS);
   275     int unitDist[] = {K1/NPARTS, K1%NPARTS};
   278     void *params[] = {
this, q_arr, recips, partialEnergy, partialVirial, unitDist};
   280 #if     CMK_SMP && USE_CKLOOP   302     for(
int i=0; i<NPARTS; i++){
   303         v0 += partialVirial[i*6+0];
   304         v1 += partialVirial[i*6+1];
   305         v2 += partialVirial[i*6+2];
   306         v3 += partialVirial[i*6+3];
   307         v4 += partialVirial[i*6+4];
   308         v5 += partialVirial[i*6+5];
   309         energy += partialEnergy[i];
   334   K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3;
   343 #if     CMK_SMP && USE_CKLOOP   348     double recipx = lattice.
a_r().
x;
   349     double recipy = lattice.
b_r().
y;
   350     double recipz = lattice.
c_r().
z;
   352     init_exp(exp1, K1, 0, K1, recipx);
   353     init_exp(exp2, K2, k2_start, k2_end, recipy);
   354     init_exp(exp3, K3, k3_start, k3_end, recipz);
   357     for ( k1=0; k1<K1; ++k1 ) {
   358       double m1, m11, b1, xp1;
   360       int k1_s = k1<=K1/2 ? k1 : k1-K1;
   363       xp1 = i_pi_volume*exp1[abs(k1_s)];
   364       for ( k2=k2_start; k2<k2_end; ++k2 ) {
   365         double m2, m22, b1b2, xp2;
   367         int k2_s = k2<=K2/2 ? k2 : k2-K2;
   370         xp2 = exp2[abs(k2_s)]*xp1;
   372         if ( k1==0 && k2==0 && k3==0 ) {
   377         for ( ; k3<k3_end; ++k3 ) {
   378           double m3, m33, xp3, msq, imsq, vir, fac;
   379           double theta3, theta, q2, qr, qc, C;
   380           theta3 = bm3[k3] *b1b2;
   384           qr = q_arr[ind]; qc=q_arr[ind+1];
   385           q2 = 2*(qr*qr + qc*qc)*theta3;
   386           if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
   387           msq = m11 + m22 + m33;
   392           q_arr[ind+1] *= theta;
   393           vir = -2*(piob+imsq);
   396           v0 += fac*(1.0+vir*m11);
   399           v3 += fac*(1.0+vir*m22);
   401           v5 += fac*(1.0+vir*m33);
   407   } 
else if ( cross(lattice.
a(),lattice.
b()).unit() == lattice.
c().
unit() ) {
   412     double recip3_x = recip3.
x;
   413     double recip3_y = recip3.
y;
   414     double recip3_z = recip3.
z;
   415     init_exp(exp3, K3, k3_start, k3_end, recip3.
length());
   418     for ( k1=0; k1<K1; ++k1 ) {
   421       int k1_s = k1<=K1/2 ? k1 : k1-K1;
   424       for ( k2=k2_start; k2<k2_end; ++k2 ) {
   425         double xp2, b1b2, m2_x, m2_y, m2_z;
   427         int k2_s = k2<=K2/2 ? k2 : k2-K2;
   428         m2_x = m1.
x + k2_s*recip2.
x;
   429         m2_y = m1.
y + k2_s*recip2.
y;
   430         m2_z = m1.
z + k2_s*recip2.
z;
   432         xp2 = i_pi_volume*exp(-piob*(m2_x*m2_x+m2_y*m2_y+m2_z*m2_z));
   434         if ( k1==0 && k2==0 && k3==0 ) {
   439         for ( ; k3<k3_end; ++k3 ) {
   440           double xp3, msq, imsq, vir, fac;
   441           double theta3, theta, q2, qr, qc, C;
   442           double m_x, m_y, m_z;
   443           theta3 = bm3[k3] *b1b2;
   444           m_x = m2_x + k3*recip3_x;
   445           m_y = m2_y + k3*recip3_y;
   446           m_z = m2_z + k3*recip3_z;
   447           msq = m_x*m_x + m_y*m_y + m_z*m_z;
   449           qr = q_arr[ind]; qc=q_arr[ind+1];
   450           q2 = 2*(qr*qr + qc*qc)*theta3;
   451           if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
   456           q_arr[ind+1] *= theta;
   457           vir = -2*(piob+imsq);
   460           v0 += fac*(1.0+vir*m_x*m_x);
   461           v1 += fac*vir*m_x*m_y;
   462           v2 += fac*vir*m_x*m_z;
   463           v3 += fac*(1.0+vir*m_y*m_y);
   464           v4 += fac*vir*m_y*m_z;
   465           v5 += fac*(1.0+vir*m_z*m_z);
   475     double recip3_x = recip3.
x;
   476     double recip3_y = recip3.
y;
   477     double recip3_z = recip3.
z;
   480     for ( k1=0; k1<K1; ++k1 ) {
   483       int k1_s = k1<=K1/2 ? k1 : k1-K1;
   486       for ( k2=k2_start; k2<k2_end; ++k2 ) {
   487         double b1b2, m2_x, m2_y, m2_z;
   489         int k2_s = k2<=K2/2 ? k2 : k2-K2;
   490         m2_x = m1.
x + k2_s*recip2.
x;
   491         m2_y = m1.
y + k2_s*recip2.
y;
   492         m2_z = m1.
z + k2_s*recip2.
z;
   495         if ( k1==0 && k2==0 && k3==0 ) {
   500         for ( ; k3<k3_end; ++k3 ) {
   501           double xp3, msq, imsq, vir, fac;
   502           double theta3, theta, q2, qr, qc, C;
   503           double m_x, m_y, m_z;
   504           theta3 = bm3[k3] *b1b2;
   505           m_x = m2_x + k3*recip3_x;
   506           m_y = m2_y + k3*recip3_y;
   507           m_z = m2_z + k3*recip3_z;
   508           msq = m_x*m_x + m_y*m_y + m_z*m_z;
   510           xp3 = i_pi_volume*exp(-piob*msq);
   511           qr = q_arr[ind]; qc=q_arr[ind+1];
   512           q2 = 2*(qr*qr + qc*qc)*theta3;
   513           if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
   518           q_arr[ind+1] *= theta;
   519           vir = -2*(piob+imsq);
   522           v0 += fac*(1.0+vir*m_x*m_x);
   523           v1 += fac*vir*m_x*m_y;
   524           v2 += fac*vir*m_x*m_z;
   525           v3 += fac*(1.0+vir*m_y*m_y);
   526           v4 += fac*vir*m_y*m_z;
   527           v5 += fac*(1.0+vir*m_z*m_z);
   535   virial[0] = 0.5 * v0;
   536   virial[1] = 0.5 * v1;
   537   virial[2] = 0.5 * v2;
   538   virial[3] = 0.5 * v3;
   539   virial[4] = 0.5 * v4;
   540   virial[5] = 0.5 * v5;
   560   int maxK1, maxK2, maxK3;
   562   K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3;
   563   maxK1 = K1/2; maxK2 = K2/2; maxK3 = K3/2;
   567   double beta3 = ewald*ewald*ewald;
   569   double fac2 = -
M_PI*
M_PI/(ewald*ewald);
   571   double fac4 = 
M_PI/ewald;
   574     double recipx = lattice.
a_r().
x;
   575     double recipy = lattice.
b_r().
y;
   576     double recipz = lattice.
c_r().
z;
   580     for ( k1=0; k1<K1; ++k1 ) {
   584       int k1_s = k1<=maxK1 ? k1 : k1-K1;
   587       for ( k2=k2_start; k2<k2_end; ++k2 ) {
   588         double m2, m22, b1b2;
   591         int k2_s = k2<=maxK2 ? k2 : k2-K2;
   595         for ( k3=k3_start; k3<k3_end; ++k3 ) {
   596           double m3, m33, msq, mFreq, vir, tempE;
   597           double b1b2b3, eTerm, q2, qr, qc;
   599           b1b2b3 = bm3[k3] *b1b2;
   603           msq = m11 + m22 + m33;
   605           qr = q_arr[ind]; qc=q_arr[ind+1];
   607           q2 = 2.0*(qr*qr + qc*qc);
   609           if ( (k3 == 0) || ( k3 == maxK3 && ! (K3 & 1) ) ) q2 *= 0.5;
   611           eTerm = b1b2b3*box_factor*((beta3 + fac1*msq)*exp(fac2*msq) + fac3*msq*mFreq*erfc(fac4*mFreq));
   614           q_arr[ind+1] *= eTerm;
   619           vir = 3.0*box_factor*b1b2b3*(fac1*exp(fac2*msq)+fac3*mFreq*erfc(fac4*mFreq))*q2;
   620           v0 += tempE + vir*m11;
   623           v3 += tempE + vir*m22;
   625           v5 += tempE + vir*m33;
   634     double recip2_x = recip2.
x;
   635     double recip2_y = recip2.
y;
   636     double recip2_z = recip2.
z;
   637     double recip3_x = recip3.
x;
   638     double recip3_y = recip3.
y;
   639     double recip3_z = recip3.
z;
   642     for ( k1=0; k1<K1; ++k1 ) {
   646       int k1_s = k1<=maxK1 ? k1 : k1-K1;
   648       for ( k2=k2_start; k2<k2_end; ++k2 ) {
   649         double b1b2, m2_x, m2_y, m2_z;
   652         int k2_s = k2<=maxK2 ? k2 : k2-K2;
   653         m2_x = m1.
x + k2_s*recip2_x;
   654         m2_y = m1.
y + k2_s*recip2_y;
   655         m2_z = m1.
z + k2_s*recip2_z;
   657         for ( k3=k3_start; k3<k3_end; ++k3 ) {
   658           double msq, mFreq, vir, tempE;
   659           double b1b2b3, eTerm, q2, qr, qc;
   660           double m_x, m_y, m_z;
   662           b1b2b3 = bm3[k3] *b1b2;
   663           m_x = m2_x + k3*recip3_x;
   664           m_y = m2_y + k3*recip3_y;
   665           m_z = m2_z + k3*recip3_z;
   667           msq = m_x*m_x + m_y*m_y + m_z*m_z;
   669           qr = q_arr[ind]; qc=q_arr[ind+1];
   671           q2 = 2.0*(qr*qr + qc*qc);
   673           if ( (k3 == 0) || ( k3 == maxK3 && ! (K3 & 1) ) ) q2 *= 0.5;
   675           eTerm = b1b2b3*box_factor*((beta3 + fac1*msq)*exp(fac2*msq) + fac3*msq*mFreq*erfc(fac4*mFreq));
   678           q_arr[ind+1] *= eTerm;
   683           vir = 3.0*box_factor*b1b2b3*(fac1*exp(fac2*msq)+fac3*mFreq*erfc(fac4*mFreq))*q2;
   684           v0 += tempE + vir*m_x*m_x;
   687           v3 += tempE + vir*m_y*m_y;
   689           v5 += tempE + vir*m_z*m_z;
   696   virial[0] = 0.5 * v0;
   697   virial[1] = 0.5 * v1;
   698   virial[2] = 0.5 * v2;
   699   virial[3] = 0.5 * v3;
   700   virial[4] = 0.5 * v4;
   701   virial[5] = 0.5 * v5;
   706 void PmeKSpace::init_exp(
double *xp, 
int K, 
int k_start, 
int k_end, 
double recip) {
   709   fac = -piob*recip*recip;
   710   int i_start = k_start;
   712   if ( k_start > K/2 ) {
   713     i_start = K - k_end + 1;
   714     i_end = K - k_start + 1;
   715   } 
else if ( k_end > K/2 ) {
   717     i_start = K - k_end + 1;
   718     if ( k_start < i_start ) i_start = k_start;
   721   for (i=i_start; i < i_end; i++)
   722     xp[i] = exp(fac*i*i);
 
void compute_energy_orthogonal_subset(float q_arr[], double *recips, double partialVirial[], double *partialEnergy, int k1from, int k1to)
 
void compute_b_moduli(double *bm, int K, int order)
 
NAMD_HOST_DEVICE Vector c() const
 
SimParameters * simParameters
 
NAMD_HOST_DEVICE int orthogonal() const
 
static void dftmod(double *bsp_mod, double *bsp_arr, int nfft)
 
double compute_energy(float q_arr[], const Lattice &lattice, double ewald, double virial[], int useCkLoop)
 
NAMD_HOST_DEVICE BigReal length(void) const
 
double compute_energy_LJPME(float q_arr[], const Lattice &lattice, double LJewald, double virial[], int useCkLoop)
 
NAMD_HOST_DEVICE BigReal volume(void) const
 
NAMD_HOST_DEVICE Vector a_r() const
 
NAMD_HOST_DEVICE Vector b_r() const
 
NAMD_HOST_DEVICE Vector c_r() const
 
NAMD_HOST_DEVICE Vector b() const
 
double compute_energy_orthogonal_helper(float q_arr[], const Lattice &lattice, double ewald, double virial[])
 
#define ALLOCA(TYPE, NAME, SIZE)
 
NAMD_HOST_DEVICE Vector a() const
 
static void compute_b_spline(REAL *__restrict frac, REAL *M, REAL *dM, int order)
 
static void compute_energy_orthogonal_ckloop(int first, int last, void *result, int paraNum, void *param)
 
NAMD_HOST_DEVICE Vector unit(void) const
 
PmeKSpace(PmeGrid grid, int K2_start, int K2_end, int K3_start, int K3_end)