9 #define CHECK_PRIORITIES 0    55   int unique = (gbisParams->
numPatches == 1) ? 1 : 0;
    59   int cid = gbisParams->
cid;
    65   float a_cut = gbisParams->
a_cut - fsMax;
    66   float a_cut_ps2 = (a_cut+fsMax)*(a_cut+fsMax);
    67   float r_cut = gbisParams->
cutoff;
    68   float a_cut2 = a_cut*a_cut;
    69   float r_cut2 = r_cut*r_cut;
    73   double t1 = 1.0*clock()/CLOCKS_PER_SEC;
    79   float rhoi0, rhois, rhoj0, rhojs, rhois2, rhojs2, rhois2_16;
    80   float ami2, amj2, api2, apj2;
    81   int numGBISPairlists = 4;
    83   float max_gcut2  = r_cut;
    84   if (a_cut+fsMax > r_cut)
    85     max_gcut2 = a_cut+fsMax;
    88   max_gcut2 *= max_gcut2;
    90   int maxGroupPairs = params->
numAtoms[1];
    91   short *groupPairs = 
new short[maxGroupPairs];
    94   for (
int ngi = minIg; ngi < maxI;  ) {
    95     int numGroupPairs = 0;
   105       dr = ngri.
x - ngrj.
x;
   107       dr = ngri.
y - ngrj.
y;
   109       dr = ngri.
z - ngrj.
z;
   111       if (r2 < max_gcut2) {
   112         groupPairs[numGroupPairs++] = ngj;
   118     for (
int i=ngi; i<ngi+iGroupSize; i++) {
   121       int *size = 
new int[numGBISPairlists];
   123       for (
int k = 0; k < numGBISPairlists; k++) {
   134       rhois = gbisParams->
intRad[0][2*i+1];
   135       api2 = a_cut + rhois;
   137       ami2 = a_cut - rhois;
   139       rhois2 = rhois*rhois;
   140       rhois2_16 = 16.0*rhois2;
   144       for (
int j = i+1; j < ngi+iGroupSize; j++) {
   157           rhojs = gbisParams->
intRad[1][2*j+1];
   158           rhojs2 = rhojs*rhojs;
   160           amj2 = a_cut - rhojs;
   163           apj2 = a_cut + rhojs;
   167           if (  r2 < ami2 && r2 < amj2 &&
   168             r2 > rhois2_16 && r2 > 16.0*rhojs2 ) {
   170             pairs[0][size[0]++] = j;
   171           } 
else if ( r2 < api2 && r2 < apj2 &&
   172             r2 > ami2 && r2 > amj2 ) {
   174             pairs[1][size[1]++] = j;
   175           } 
else if ( r2 < a_cut_ps2 ) {
   177             pairs[2][size[2]++] = j;
   178           } 
else if ( r2 < r_cut2 ) {
   180             pairs[3][size[3]++] = j;
   185       for (
int g = 0; g < numGroupPairs; g++) {
   186         int ngj = groupPairs[g];
   189         int maxJ = ngj+jGroupSize;
   190         for (
int j = ngj; j < maxJ; j++) {
   205           rhojs = gbisParams->
intRad[1][2*j+1];
   206           rhojs2 = rhojs*rhojs;
   208           amj2 = a_cut - rhojs;
   211           apj2 = a_cut + rhojs;
   214           if (  r2 < ami2 && r2 < amj2 &&
   215             r2 > rhois2_16 && r2 > 16.0*rhojs2 ) {
   218             pairs[0][size[0]++] = j;
   219           } 
else if ( r2 < api2 && r2 < apj2 &&
   220             r2 > ami2 && r2 > amj2 ) {
   223             pairs[1][size[1]++] = j;
   224           } 
else if ( r2 < a_cut_ps2 ) {
   227             pairs[2][size[2]++] = j;
   228           } 
else if ( r2 < r_cut2 ) {
   231             pairs[3][size[3]++] = j;
   235     for (
int k = 0; k < numGBISPairlists; k++) {
   244     for (
int s = 0; s < strideIg; s++) {
   249   for (
int k = 0; k < numGBISPairlists; k++)
   253   double t2 = 1.0*clock()/CLOCKS_PER_SEC;
   254   CkPrintf(
"PHASELST: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
   264 CkPrintf(
"PE%i, S%09i, P%i\n",CkMyPe(),gbisParams->
sequence,gbisParams->
gbisPhase);
   274   for (
int s = 0; s < params->
minPart; s++) {
   280   int unique = (gbisParams->
numPatches == 1) ? 1 : 0;
   281   int numGBISPairlists = 4;
   282   float r_cut = gbisParams->
cutoff;
   284   float a_cut = gbisParams->
a_cut-fsMax;
   285   float a_cut2 = a_cut*a_cut;
   286   float a_cut_ps = a_cut + fsMax;
   287   float r_cut2 = r_cut*r_cut;
   288   float a_cut_ps2 = a_cut_ps*a_cut_ps;
   290   for (
int k = 0; k < numGBISPairlists; k++)
   304   int domains[] = {0, 0, 0, 0, 0, 0, 0, 0};
   306   double t1 = 1.0*clock()/CLOCKS_PER_SEC;
   312   register float r, r_i, r2_i;
   313   float rhoi0, rhois, rhojs, rhoj0;
   317   register float ta = 
TA;
   318   register float tb = 
TB;
   319   register float tc = 
TC;
   320   register float td = 
TD;
   321   register float te = 
TE;
   323   register float hij,hji;
   326   float rhois2, rhojs2;
   330   for (
int ngi = minIg; ngi < maxI;  ) {
   332   for (
int i = ngi; i < ngi+iGroupSize; i++) {
   337     rhoi0 = gbisParams->
intRad[0][2*i+0];
   338     rhois = gbisParams->
intRad[0][2*i+1];
   339     rhois2 = rhois*rhois;
   343     for (
register int jj = 0; jj < numPairs; jj++) {
   358       rhoj0 = gbisParams->
intRad[1][2*j+0];
   359       rhojs = gbisParams->
intRad[1][2*j+1];
   360       rhojs2 = rhojs*rhojs;
   363       hij = rhojs*r2_i*k*(ta+k*(tb+k*(tc+k*(td+k*te))));
   366       hji = rhois*r2_i*k*(ta+k*(tb+k*(tc+k*(td+k*te))));
   370       int id1 = params->
pExt[0][i].
id;
   371       int id2 = params->
pExt[1][j].
id;
   404       gbisParams->
psiSum[1][j] += hji;
   406     gbisParams->
psiSum[0][i] += psiI;
   408   for (
int s = 0; s < strideIg; s++) {
   413   double t2 = 1.0*clock()/CLOCKS_PER_SEC;
   417   CkPrintf(
"PHASE1.1: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
   419   t1 = 1.0*clock()/CLOCKS_PER_SEC;
   429   float a_cut_i = 1.0 / a_cut;
   430   float a_cut_i2 = a_cut_i*a_cut_i;
   434   for (
int ngi = minIg; ngi < maxI;  ) {
   436   for (
int i = ngi; i < ngi+iGroupSize; i++) {
   441     rhoi0 = gbisParams->
intRad[0][2*i+0];
   442     rhois = gbisParams->
intRad[0][2*i+1];
   446     for (
register int jj = 0; jj < numPairs; jj++) {
   459       rhoj0 = gbisParams->
intRad[1][2*j+0];
   460       rhojs = gbisParams->
intRad[1][2*j+1];
   462       float tmp1 = 0.125*r_i;
   463       float tmp2 = r2 - 4.0*a_cut*r;
   469       logri = log(rmris*a_cut_i);
   470       logrj = log(rmrjs*a_cut_i);
   475       hij = tmp1*(1 + rr*rmrsi +
   476         a_cut_i2*(tmp2 - rs2) + logrj+logrj);
   482       hji = tmp1*(1 + rr*rmrsi +
   483         a_cut_i2*(tmp2 - rs2) + 2.0* logri);
   486       int id1 = params->
pExt[0][i].
id;
   487       int id2 = params->
pExt[1][j].
id;
   524       gbisParams->
psiSum[1][j] += hji;
   526     gbisParams->
psiSum[0][i] += psiI;
   528   for (
int s = 0; s < strideIg; s++) {
   533   t2 = 1.0*clock()/CLOCKS_PER_SEC;
   534   CkPrintf(
"PHASE1.2: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
   536   t1 = 1.0*clock()/CLOCKS_PER_SEC;
   542   for (
int ngi = minIg; ngi < maxI;  ) {
   544   for (
int i = ngi; i < ngi+iGroupSize; i++) {
   549     rhoi0 = gbisParams->
intRad[0][2*i+0];
   550     rhois = gbisParams->
intRad[0][2*i+1];
   554     for (
register int jj = 0; jj < numPairs; jj++) {
   564       rhojs = gbisParams->
intRad[1][2*j+1];
   565       rhoj0 = gbisParams->
intRad[1][2*j+0];
   575       int id1 = params->
pExt[0][i].
id;
   576       int id2 = params->
pExt[1][j].
id;
   620       gbisParams->
psiSum[1][j] += hji;
   622     gbisParams->
psiSum[0][i] += psiI;
   624   for (
int s = 0; s < strideIg; s++) {
   630   t2 = 1.0*clock()/CLOCKS_PER_SEC;
   631   CkPrintf(
"PHASE1.3: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
   641   float epsilon_s_i = 1/epsilon_s;
   642   float epsilon_p_i = 1/epsilon_p;
   643   float kappa = gbisParams->
kappa;
   646   float r_cut_2 = 1.0 / r_cut2;
   647   float r_cut_4 = 4.0*r_cut_2*r_cut_2;
   648   float coulEij=0,ddrCoulEij=0,gbEij=0,ddrGbEij=0;
   649   float dEdai=0,dEdaj=0, qiqj=0;
   650   float scale=0, ddrScale=0;
   651   float rnx=0,rny=0,rnz=0;
   652   float fx=0,fy=0,fz=0,forceCoul=0, forcedEdr=0;
   655   double t1 = 1.0*clock()/CLOCKS_PER_SEC;
   662   float bornRadI, bornRadJ;
   665   float aiaj,expr2aiaj4,fij,f_i,expkappa,Dij;
   666   float aiaj4,ddrDij,ddrf_i,ddrfij,tmp_dEda;
   668   for (
int c = 0; c < 4; c++) {
   669   for (
int ngi = minIg; ngi < maxI;  ) {
   671   for (
int i = ngi; i < ngi+iGroupSize; i++) {
   681     fIx = fIy = fIz = 0.0;
   683     bornRadI = gbisParams->
bornRad[0][i];
   684     for (
int jj = 0; jj < numPairs; jj++) {
   692       r2 = dx*dx + dy*dy + dz*dz;
   693       if (r2 > r_cut2) 
continue;
   694       qiqj = qi*params->
p[1][j].
charge;
   695       bornRadJ = gbisParams->
bornRad[1][j];
   709   aiaj = bornRadI*bornRadJ;
   711   expr2aiaj4 = exp(-r2/aiaj4);
   712   fij = sqrt(r2+aiaj*expr2aiaj4);
   714   expkappa = (kappa > 0) ? exp(-kappa*fij) : 1.0;
   715   Dij = epsilon_p_i - expkappa*epsilon_s_i;
   716   gbEij = qiqj*Dij*f_i;
   719   ddrfij = r*f_i*(1 - 0.25*expr2aiaj4);
   720   ddrf_i = -ddrfij*f_i*f_i;
   721   ddrDij = kappa*expkappa*ddrfij*epsilon_s_i;
   722   ddrGbEij = qiqj*(ddrDij*f_i+Dij*ddrf_i);
   729         scale = r2 * r_cut_2 - 1;
   731         ddrScale = r*(r2-r_cut2)*r_cut_4;
   734         forcedEdr = -(ddrGbEij)*scale-(gbEij)*ddrScale;
   737         forcedEdr = -ddrGbEij;
   743         tmp_dEda = 0.5*qiqj*f_i*f_i
   744                       *(kappa*epsilon_s_i*expkappa-Dij*f_i)
   745                       *(aiaj+0.25*r2)*expr2aiaj4;
   746         dEdai = tmp_dEda/bornRadI;
   747         dEdaj = tmp_dEda/bornRadJ;
   748         dEdaSumI += dEdai*scale;
   749         gbisParams->
dEdaSum[1][j] += dEdaj*scale;
   754       int id1 = params->
pExt[0][i].
id;
   755       int id2 = params->
pExt[1][j].
id;
   758       float bR1 = bornRadI;
   759       float bR2 = bornRadJ;
   787       params->
ff[1][j].
x -= fx;
   788       params->
ff[1][j].
y -= fy;
   789       params->
ff[1][j].
z -= fz;
   800     gbisParams->
dEdaSum[0][i] += dEdaSumI;
   801     params->
ff[0][i].
x += fIx;
   802     params->
ff[0][i].
y += fIy;
   803     params->
ff[0][i].
z += fIz;
   807       float fij = bornRadI;
   808       float expkappa = exp(-kappa*fij);
   809       float Dij = epsilon_p_i - expkappa*epsilon_s_i;
   810       float gbEij = qi*params->
p[0][i].
charge*Dij/fij;
   814   for (
int s = 0; s < strideIg; s++) {
   820   double t2 = 1.0*clock()/CLOCKS_PER_SEC;
   824   CkPrintf(
"PHASE2.0: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
   834   double t1 = 1.0*clock()/CLOCKS_PER_SEC;
   842   register float r, r_i;
   843   register float rhoi0, rhois;
   847   register float fIx, fIy, fIz;
   853   register float dHdrPrefixI;
   857   register int c, numPairs, jj, j;
   859   register float da = 
DA;
   860   register float db = 
DB;
   861   register float dc = 
DC;
   862   register float dd = 
DD;
   863   register float de = 
DE;
   868   for (
int ngi = minIg; ngi < maxI;  ) {
   870   for (
int i = ngi; i < ngi+iGroupSize; i++) {
   875     rhois = gbisParams->
intRad[0][2*i+1];
   878     fIx = fIy = fIz = 0.0;
   880     for (jj = 0; jj < numPairs; jj++) {
   887       r2 = dx*dx + dy*dy + dz*dz;
   893       rhojs = gbisParams->
intRad[1][2*j+1];
   896       dhij = -rhojs*r_i3*k*
   897               (da+k*(db+k*(dc+k*(dd+k*de))));
   900       dhji = -rhois*r_i3*k*
   901               (da+k*(db+k*(dc+k*(dd+k*de))));
   904       forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji);
   905       fx = dx * forceAlpha;
   906       fy = dy * forceAlpha;
   907       fz = dz * forceAlpha;
   909       params->
fullf[1][j].
x -= fx;
   910       params->
fullf[1][j].
y -= fy;
   911       params->
fullf[1][j].
z -= fz;
   919       int id1 = params->
pExt[0][i].
id;
   920       int id2 = params->
pExt[1][j].
id;
   946     params->
fullf[0][i].
x += fIx;
   947     params->
fullf[0][i].
y += fIy;
   948     params->
fullf[0][i].
z += fIz;
   950   for (
int s = 0; s < strideIg; s++) {
   956   t2 = 1.0*clock()/CLOCKS_PER_SEC;
   957   CkPrintf(
"PHASE3.1: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
   959   t1 = 1.0*clock()/CLOCKS_PER_SEC;
   963   float a_cut_i = 1.0/a_cut;
   964   float a_cut_i2 = a_cut_i*a_cut_i;
   965   float a_cut2 = a_cut*a_cut;
   969   float rhois2, rhojs2;
   975   for (
int ngi = minIg; ngi < maxI;  ) {
   977   for (
int i = ngi; i < ngi+iGroupSize; i++) {
   982     rhois = gbisParams->
intRad[0][2*i+1];
   983     rhois2 = rhois*rhois;
   986     fIx = fIy = fIz = 0.0;
   988     for (jj = 0; jj < numPairs; jj++) {
   996       r2 = dx*dx + dy*dy + dz*dz;
  1001       rhojs = gbisParams->
intRad[1][2*j+1];
  1002       rhojs2 = rhojs*rhojs;
  1008       logrj = log(rmrs*a_cut_i);
  1009       dhij = r_i2*(-0.25*logrj - (a_cut2 - rmrs2)*(rhojs2 + r2)*0.125*a_cut_i2*rmrsi*rmrsi);
  1015       logri = log(rmrs*a_cut_i);
  1016       dhji = r_i2*(-0.25*logri - (a_cut2 - rmrs2)*(rhois2 + r2)*0.125*a_cut_i2*rmrsi*rmrsi);
  1019       forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji);
  1020       fx = dx * forceAlpha;
  1021       fy = dy * forceAlpha;
  1022       fz = dz * forceAlpha;
  1024       params->
fullf[1][j].
x -= fx;
  1025       params->
fullf[1][j].
y -= fy;
  1026       params->
fullf[1][j].
z -= fz;
  1034       int id1 = params->
pExt[0][i].
id;
  1035       int id2 = params->
pExt[1][j].
id;
  1061     params->
fullf[0][i].
x += fIx;
  1062     params->
fullf[0][i].
y += fIy;
  1063     params->
fullf[0][i].
z += fIz;
  1065   for (
int s = 0; s < strideIg; s++) {
  1071   t2 = 1.0*clock()/CLOCKS_PER_SEC;
  1072   CkPrintf(
"PHASE3.2: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops,nops);
  1074   t1 = 1.0*clock()/CLOCKS_PER_SEC;
  1080   for (
int ngi = minIg; ngi < maxI;  ) {
  1082   for (
int i = ngi; i < ngi+iGroupSize; i++) {
  1087     rhoi0 = gbisParams->
intRad[0][2*i+0];
  1088     rhois = gbisParams->
intRad[0][2*i+1];
  1091     fIx = fIy = fIz = 0.0;
  1093     for (jj = 0; jj < numPairs; jj++) {
  1101       r2 = dx*dx + dy*dy + dz*dz;
  1106       rhojs = gbisParams->
intRad[1][2*j+1];
  1107       rhoj0 = gbisParams->
intRad[1][2*j+0];
  1115       forceAlpha = -r_i*(dHdrPrefixI*dhij+dHdrPrefixJ*dhji); 
  1116       fx = dx * forceAlpha;
  1117       fy = dy * forceAlpha;
  1118       fz = dz * forceAlpha;
  1124       params->
fullf[1][j].
x -= fx;
  1125       params->
fullf[1][j].
y -= fy;
  1126       params->
fullf[1][j].
z -= fz;
  1130       int id1 = params->
pExt[0][i].
id;
  1131       int id2 = params->
pExt[1][j].
id;
  1166     params->
fullf[0][i].
x += fIx;
  1167     params->
fullf[0][i].
y += fIy;
  1168     params->
fullf[0][i].
z += fIz;
  1170   for (
int s = 0; s < strideIg; s++) {
  1177   t2 = 1.0*clock()/CLOCKS_PER_SEC;
  1178   CkPrintf(
"PHASE3.3: %8.3f ms @ %8.3f ns/iter for %i iter\n",1000.0*(t2-t1), 1000000000.0*(t2-t1)/nops, nops);
 
void calcGBIS(nonbonded *params, GBISParamStruct *gbisParams)
 
static void h2(float r, float r2, float ri, float rc, float r0, float rs, float &h)
 
static void CalcDHPair(float r, float r2, float ri, float rc, float ri0, float rjs, float rj0, float ris, int &dij, int &dji, float &dhij, float &dhji)
 
void nextlist(plint **list, int *list_size)
 
static void h1(float r, float r2, float ri, float rc, float r0, float rs, float &h)
 
Pairlists * gbisStepPairlists[4]
 
static void CalcHPair(float r, float r2, float ri, float rc, float ri0, float rjs, float rj0, float ris, int &dij, int &dji, float &hij, float &hji)
 
void newsize(int list_size)
 
void pairlistFromAll(nonbonded *params, GBISParamStruct *gbisParams, int minIg, int strideIg, int maxI)