18 #include "ComputeMoaMgr.decl.h" 20 #define ALLOCA(TYPE,NAME,SIZE) TYPE *NAME = (TYPE *) alloca((SIZE)*sizeof(TYPE)) 22 static void dftmod(
double *bsp_mod,
double *bsp_arr,
int nfft) {
24 double twopi, arg, sum1, sum2;
25 double infft = 1.0/nfft;
30 for (k = 0; k <nfft; ++k) {
33 for (j = 0; j < nfft; ++j) {
34 arg = twopi * k * j * infft;
35 sum1 += bsp_arr[j] * cos(arg);
36 sum2 += bsp_arr[j] * sin(arg);
38 bsp_mod[k] = sum1*sum1 + sum2*sum2;
46 double *M =
new double[3*
order];
47 double *dM =
new double[3*
order];
48 double *scratch =
new double[K];
50 fr[0]=fr[1]=fr[2]=0.0;
52 for (i=0; i<
order; i++) bm[i] = M[i];
53 for (i=
order; i<K; i++) bm[i] = 0.0;
55 for (i=0; i<K; i++) bm[i] = 1.0/scratch[i];
64 : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
65 k3_start(K3_start), k3_end(K3_end) {
66 int K1, K2, K3,
order;
73 exp1 =
new double[K1/2 + 1];
74 exp2 =
new double[K2/2 + 1];
75 exp3 =
new double[K3/2 + 1];
83 #ifdef OPENATOM_VERSION 85 : myGrid(grid), k2_start(K2_start), k2_end(K2_end),
86 k3_start(K3_start), k3_end(K3_end) {
87 int K1, K2, K3,
order;
96 exp1 =
new double[K1/2 + 1];
97 exp2 =
new double[K2/2 + 1];
98 exp3 =
new double[K3/2 + 1];
105 CkPrintf(
"######################################################\n");
106 CkPrintf(
"Entering recvBmX loop on processor %d \n", CkMyPe() );
108 for ( i=0; i<=K1; ++i) {
109 CkPrintf(
"bm1 value pre-send %d = %d \n", i, bm1[i] );
111 for ( i=0; i<=K1; ++i) {
112 CkPrintf(
"bm1 reference pre-send %d = %d \n", i, &bm1[i] );
114 CkPrintf(
"bm1: %d \n", *bm1);
115 moaProxy[CkMyPe()].recvB(K2_start, K2_end, K3_start, K3_end, K1, K2, K3, bm1, bm2, bm3,
order);
123 #endif // OPENATOM_VERSION 147 K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3;
149 double recipx = recips[0];
150 double recipy = recips[1];
151 double recipz = recips[2];
153 int ind = k1from*(k2_end-k2_start)*(k3_end-k3_start)*2;
155 for ( k1=k1from; k1<=k1to; ++k1 ) {
156 double m1, m11, b1, xp1;
158 int k1_s = k1<=K1/2 ? k1 : k1-K1;
161 xp1 = i_pi_volume*exp1[abs(k1_s)];
162 for ( k2=k2_start; k2<k2_end; ++k2 ) {
163 double m2, m22, b1b2, xp2;
165 int k2_s = k2<=K2/2 ? k2 : k2-K2;
168 xp2 = exp2[abs(k2_s)]*xp1;
171 if (k1==0 && k2==0 && k3==0) {
176 for (; k3<k3_end; ++k3 ) {
177 double m3, m33, xp3, msq, imsq, vir, fac;
178 double theta3, theta, q2, qr, qc, C;
179 theta3 = bm3[k3] *b1b2;
183 qr = q_arr[ind]; qc=q_arr[ind+1];
184 q2 = 2*(qr*qr + qc*qc)*theta3;
185 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
186 msq = m11 + m22 + m33;
191 q_arr[ind+1] *= theta;
192 vir = -2*(piob+imsq);
195 v0 += fac*(1.0+vir*m11);
198 v3 += fac*(1.0+vir*m22);
200 v5 += fac*(1.0+vir*m33);
206 *partialEnergy = 0.5*energy;
207 partialVirial[0] = 0.5*v0;
208 partialVirial[1] = 0.5*v1;
209 partialVirial[2] = 0.5*v2;
210 partialVirial[3] = 0.5*v3;
211 partialVirial[4] = 0.5*v4;
212 partialVirial[5] = 0.5*v5;
215 for (
int i = first; i <= last; ++i ) {
216 void **params = (
void **)param;
218 float *q_arr = (
float *)params[1];
219 double *recips = (
double *)params[2];
220 double *partialEnergy = (
double *)params[3];
221 double *partialVirial = (
double *)params[4];
222 int *unitDist = (
int *)params[5];
224 int unit = unitDist[0];
225 int remains = unitDist[1];
231 k1from = remains*(unit+1)+(i-remains)*unit;
232 k1to = k1from+unit-1;
234 double *pEnergy = partialEnergy+i;
235 double *pVirial = partialVirial+i*6;
253 K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3;
260 double recipx = lattice.
a_r().
x;
261 double recipy = lattice.
b_r().
y;
262 double recipz = lattice.
c_r().
z;
264 init_exp(exp1, K1, 0, K1, recipx);
265 init_exp(exp2, K2, k2_start, k2_end, recipy);
266 init_exp(exp3, K3, k3_start, k3_end, recipz);
268 double recips[] = {recipx, recipy, recipz};
269 int NPARTS=CmiMyNodeSize();
270 int maxParts = ( K1 * ( k2_end - k2_start ) * ( k3_end - k3_start ) + 127 ) / 128;
271 if ( NPARTS > maxParts ) NPARTS = maxParts;
272 if ( NPARTS > K1 ) NPARTS = K1;
273 ALLOCA(
double, partialEnergy, NPARTS);
274 ALLOCA(
double, partialVirial, 6*NPARTS);
275 int unitDist[] = {K1/NPARTS, K1%NPARTS};
278 void *params[] = {
this, q_arr, recips, partialEnergy, partialVirial, unitDist};
280 #if CMK_SMP && USE_CKLOOP 302 for(
int i=0; i<NPARTS; i++){
303 v0 += partialVirial[i*6+0];
304 v1 += partialVirial[i*6+1];
305 v2 += partialVirial[i*6+2];
306 v3 += partialVirial[i*6+3];
307 v4 += partialVirial[i*6+4];
308 v5 += partialVirial[i*6+5];
309 energy += partialEnergy[i];
334 K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3;
343 #if CMK_SMP && USE_CKLOOP 348 double recipx = lattice.
a_r().
x;
349 double recipy = lattice.
b_r().
y;
350 double recipz = lattice.
c_r().
z;
352 init_exp(exp1, K1, 0, K1, recipx);
353 init_exp(exp2, K2, k2_start, k2_end, recipy);
354 init_exp(exp3, K3, k3_start, k3_end, recipz);
357 for ( k1=0; k1<K1; ++k1 ) {
358 double m1, m11, b1, xp1;
360 int k1_s = k1<=K1/2 ? k1 : k1-K1;
363 xp1 = i_pi_volume*exp1[abs(k1_s)];
364 for ( k2=k2_start; k2<k2_end; ++k2 ) {
365 double m2, m22, b1b2, xp2;
367 int k2_s = k2<=K2/2 ? k2 : k2-K2;
370 xp2 = exp2[abs(k2_s)]*xp1;
372 if ( k1==0 && k2==0 && k3==0 ) {
377 for ( ; k3<k3_end; ++k3 ) {
378 double m3, m33, xp3, msq, imsq, vir, fac;
379 double theta3, theta, q2, qr, qc, C;
380 theta3 = bm3[k3] *b1b2;
384 qr = q_arr[ind]; qc=q_arr[ind+1];
385 q2 = 2*(qr*qr + qc*qc)*theta3;
386 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
387 msq = m11 + m22 + m33;
392 q_arr[ind+1] *= theta;
393 vir = -2*(piob+imsq);
396 v0 += fac*(1.0+vir*m11);
399 v3 += fac*(1.0+vir*m22);
401 v5 += fac*(1.0+vir*m33);
407 }
else if ( cross(lattice.
a(),lattice.
b()).unit() == lattice.
c().
unit() ) {
412 double recip3_x = recip3.
x;
413 double recip3_y = recip3.
y;
414 double recip3_z = recip3.
z;
415 init_exp(exp3, K3, k3_start, k3_end, recip3.
length());
418 for ( k1=0; k1<K1; ++k1 ) {
421 int k1_s = k1<=K1/2 ? k1 : k1-K1;
424 for ( k2=k2_start; k2<k2_end; ++k2 ) {
425 double xp2, b1b2, m2_x, m2_y, m2_z;
427 int k2_s = k2<=K2/2 ? k2 : k2-K2;
428 m2_x = m1.
x + k2_s*recip2.
x;
429 m2_y = m1.
y + k2_s*recip2.
y;
430 m2_z = m1.
z + k2_s*recip2.
z;
432 xp2 = i_pi_volume*exp(-piob*(m2_x*m2_x+m2_y*m2_y+m2_z*m2_z));
434 if ( k1==0 && k2==0 && k3==0 ) {
439 for ( ; k3<k3_end; ++k3 ) {
440 double xp3, msq, imsq, vir, fac;
441 double theta3, theta, q2, qr, qc, C;
442 double m_x, m_y, m_z;
443 theta3 = bm3[k3] *b1b2;
444 m_x = m2_x + k3*recip3_x;
445 m_y = m2_y + k3*recip3_y;
446 m_z = m2_z + k3*recip3_z;
447 msq = m_x*m_x + m_y*m_y + m_z*m_z;
449 qr = q_arr[ind]; qc=q_arr[ind+1];
450 q2 = 2*(qr*qr + qc*qc)*theta3;
451 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
456 q_arr[ind+1] *= theta;
457 vir = -2*(piob+imsq);
460 v0 += fac*(1.0+vir*m_x*m_x);
461 v1 += fac*vir*m_x*m_y;
462 v2 += fac*vir*m_x*m_z;
463 v3 += fac*(1.0+vir*m_y*m_y);
464 v4 += fac*vir*m_y*m_z;
465 v5 += fac*(1.0+vir*m_z*m_z);
475 double recip3_x = recip3.
x;
476 double recip3_y = recip3.
y;
477 double recip3_z = recip3.
z;
480 for ( k1=0; k1<K1; ++k1 ) {
483 int k1_s = k1<=K1/2 ? k1 : k1-K1;
486 for ( k2=k2_start; k2<k2_end; ++k2 ) {
487 double b1b2, m2_x, m2_y, m2_z;
489 int k2_s = k2<=K2/2 ? k2 : k2-K2;
490 m2_x = m1.
x + k2_s*recip2.
x;
491 m2_y = m1.
y + k2_s*recip2.
y;
492 m2_z = m1.
z + k2_s*recip2.
z;
495 if ( k1==0 && k2==0 && k3==0 ) {
500 for ( ; k3<k3_end; ++k3 ) {
501 double xp3, msq, imsq, vir, fac;
502 double theta3, theta, q2, qr, qc, C;
503 double m_x, m_y, m_z;
504 theta3 = bm3[k3] *b1b2;
505 m_x = m2_x + k3*recip3_x;
506 m_y = m2_y + k3*recip3_y;
507 m_z = m2_z + k3*recip3_z;
508 msq = m_x*m_x + m_y*m_y + m_z*m_z;
510 xp3 = i_pi_volume*exp(-piob*msq);
511 qr = q_arr[ind]; qc=q_arr[ind+1];
512 q2 = 2*(qr*qr + qc*qc)*theta3;
513 if ( (k3 == 0) || ( k3 == K3/2 && ! (K3 & 1) ) ) q2 *= 0.5;
518 q_arr[ind+1] *= theta;
519 vir = -2*(piob+imsq);
522 v0 += fac*(1.0+vir*m_x*m_x);
523 v1 += fac*vir*m_x*m_y;
524 v2 += fac*vir*m_x*m_z;
525 v3 += fac*(1.0+vir*m_y*m_y);
526 v4 += fac*vir*m_y*m_z;
527 v5 += fac*(1.0+vir*m_z*m_z);
535 virial[0] = 0.5 * v0;
536 virial[1] = 0.5 * v1;
537 virial[2] = 0.5 * v2;
538 virial[3] = 0.5 * v3;
539 virial[4] = 0.5 * v4;
540 virial[5] = 0.5 * v5;
560 int maxK1, maxK2, maxK3;
562 K1=myGrid.
K1; K2=myGrid.
K2; K3=myGrid.
K3;
563 maxK1 = K1/2; maxK2 = K2/2; maxK3 = K3/2;
567 double beta3 = ewald*ewald*ewald;
569 double fac2 = -
M_PI*
M_PI/(ewald*ewald);
571 double fac4 =
M_PI/ewald;
574 double recipx = lattice.
a_r().
x;
575 double recipy = lattice.
b_r().
y;
576 double recipz = lattice.
c_r().
z;
580 for ( k1=0; k1<K1; ++k1 ) {
584 int k1_s = k1<=maxK1 ? k1 : k1-K1;
587 for ( k2=k2_start; k2<k2_end; ++k2 ) {
588 double m2, m22, b1b2;
591 int k2_s = k2<=maxK2 ? k2 : k2-K2;
595 for ( k3=k3_start; k3<k3_end; ++k3 ) {
596 double m3, m33, msq, mFreq, vir, tempE;
597 double b1b2b3, eTerm, q2, qr, qc;
599 b1b2b3 = bm3[k3] *b1b2;
603 msq = m11 + m22 + m33;
605 qr = q_arr[ind]; qc=q_arr[ind+1];
607 q2 = 2.0*(qr*qr + qc*qc);
609 if ( (k3 == 0) || ( k3 == maxK3 && ! (K3 & 1) ) ) q2 *= 0.5;
611 eTerm = b1b2b3*box_factor*((beta3 + fac1*msq)*exp(fac2*msq) + fac3*msq*mFreq*erfc(fac4*mFreq));
614 q_arr[ind+1] *= eTerm;
619 vir = 3.0*box_factor*b1b2b3*(fac1*exp(fac2*msq)+fac3*mFreq*erfc(fac4*mFreq))*q2;
620 v0 += tempE + vir*m11;
623 v3 += tempE + vir*m22;
625 v5 += tempE + vir*m33;
634 double recip2_x = recip2.
x;
635 double recip2_y = recip2.
y;
636 double recip2_z = recip2.
z;
637 double recip3_x = recip3.
x;
638 double recip3_y = recip3.
y;
639 double recip3_z = recip3.
z;
642 for ( k1=0; k1<K1; ++k1 ) {
646 int k1_s = k1<=maxK1 ? k1 : k1-K1;
648 for ( k2=k2_start; k2<k2_end; ++k2 ) {
649 double b1b2, m2_x, m2_y, m2_z;
652 int k2_s = k2<=maxK2 ? k2 : k2-K2;
653 m2_x = m1.
x + k2_s*recip2_x;
654 m2_y = m1.
y + k2_s*recip2_y;
655 m2_z = m1.
z + k2_s*recip2_z;
657 for ( k3=k3_start; k3<k3_end; ++k3 ) {
658 double msq, mFreq, vir, tempE;
659 double b1b2b3, eTerm, q2, qr, qc;
660 double m_x, m_y, m_z;
662 b1b2b3 = bm3[k3] *b1b2;
663 m_x = m2_x + k3*recip3_x;
664 m_y = m2_y + k3*recip3_y;
665 m_z = m2_z + k3*recip3_z;
667 msq = m_x*m_x + m_y*m_y + m_z*m_z;
669 qr = q_arr[ind]; qc=q_arr[ind+1];
671 q2 = 2.0*(qr*qr + qc*qc);
673 if ( (k3 == 0) || ( k3 == maxK3 && ! (K3 & 1) ) ) q2 *= 0.5;
675 eTerm = b1b2b3*box_factor*((beta3 + fac1*msq)*exp(fac2*msq) + fac3*msq*mFreq*erfc(fac4*mFreq));
678 q_arr[ind+1] *= eTerm;
683 vir = 3.0*box_factor*b1b2b3*(fac1*exp(fac2*msq)+fac3*mFreq*erfc(fac4*mFreq))*q2;
684 v0 += tempE + vir*m_x*m_x;
687 v3 += tempE + vir*m_y*m_y;
689 v5 += tempE + vir*m_z*m_z;
696 virial[0] = 0.5 * v0;
697 virial[1] = 0.5 * v1;
698 virial[2] = 0.5 * v2;
699 virial[3] = 0.5 * v3;
700 virial[4] = 0.5 * v4;
701 virial[5] = 0.5 * v5;
706 void PmeKSpace::init_exp(
double *xp,
int K,
int k_start,
int k_end,
double recip) {
709 fac = -piob*recip*recip;
710 int i_start = k_start;
712 if ( k_start > K/2 ) {
713 i_start = K - k_end + 1;
714 i_end = K - k_start + 1;
715 }
else if ( k_end > K/2 ) {
717 i_start = K - k_end + 1;
718 if ( k_start < i_start ) i_start = k_start;
721 for (i=i_start; i < i_end; i++)
722 xp[i] = exp(fac*i*i);
void compute_energy_orthogonal_subset(float q_arr[], double *recips, double partialVirial[], double *partialEnergy, int k1from, int k1to)
void compute_b_moduli(double *bm, int K, int order)
NAMD_HOST_DEVICE Vector c() const
SimParameters * simParameters
NAMD_HOST_DEVICE int orthogonal() const
static void dftmod(double *bsp_mod, double *bsp_arr, int nfft)
double compute_energy(float q_arr[], const Lattice &lattice, double ewald, double virial[], int useCkLoop)
NAMD_HOST_DEVICE BigReal length(void) const
double compute_energy_LJPME(float q_arr[], const Lattice &lattice, double LJewald, double virial[], int useCkLoop)
NAMD_HOST_DEVICE BigReal volume(void) const
NAMD_HOST_DEVICE Vector a_r() const
NAMD_HOST_DEVICE Vector b_r() const
NAMD_HOST_DEVICE Vector c_r() const
NAMD_HOST_DEVICE Vector b() const
double compute_energy_orthogonal_helper(float q_arr[], const Lattice &lattice, double ewald, double virial[])
#define ALLOCA(TYPE, NAME, SIZE)
NAMD_HOST_DEVICE Vector a() const
static void compute_b_spline(REAL *__restrict frac, REAL *M, REAL *dM, int order)
static void compute_energy_orthogonal_ckloop(int first, int last, void *result, int paraNum, void *param)
NAMD_HOST_DEVICE Vector unit(void) const
PmeKSpace(PmeGrid grid, int K2_start, int K2_end, int K3_start, int K3_end)