14 #include "WorkDistrib.decl.h"    28 #define FLOPS(X) flops += X;    38                 int minPartition, 
int maxPartition, 
int numPartitions, 
int numPatches)
    39   : 
Compute(c), workArrays(_workArrays),
    40     minPart(minPartition), maxPart(maxPartition),
    41     strideIg(numPartitions), numParts(numPartitions),
    42     maxAtomRadius(1.9+1.4)
    92     for (
int i=0; i<8; i++) {
   169   for (
int pI = 0; pI < 8; pI++) {
   173     for (
int pJ = 0; pJ < 8; pJ++) {
   177       if (  ( gsa==1 && (jia>iia) != (idx[pJ][0]>idx[pI][0]) ) ||
   178             ( gsb==1 && (jib>iib) != (idx[pJ][1]>idx[pI][1]) ) ||
   179             ( gsc==1 && (jic>iic) != (idx[pJ][2]>idx[pI][2]) ) ||
   194   for (
int i=0; i<8; i++) {
   204   for (
int i=0; i<8; i++) {
   227   if ( 
patch[0]->flags.doNonbonded) {
   232     for (
int i=0; i<8; i++) {
   252   if ( bounds[0][0] < bounds[0][1] ) { 
   253     if (x < bounds[0][0] || x >= bounds[0][1] )
   256     if (x < bounds[0][0] && x >= bounds[0][1] )
   261   if ( bounds[1][0] < bounds[1][1] ) { 
   262     if (y < bounds[1][0] || y >= bounds[1][1] )
   265     if (y < bounds[1][0] && y >= bounds[1][1] )
   270   if ( bounds[2][0] < bounds[2][1] ) { 
   271     if (z < bounds[2][0] || z >= bounds[2][1] )
   274     if (z < bounds[2][0] && z >= bounds[2][1] )
   282   return PI*ri*(2*ri-r-(ri*ri-rj*rj)/r);
   294   Real probeRadius = 1.4f;
   295   Real cutMargin = 0.f;
   299   BigReal dxij, dyij, dzij, r2ij;
   300   BigReal dxik, dyik, dzik, r2ik;
   301   BigReal dxjk, dyjk, dzjk, r2jk;
   312   double t_start = 1.0*clock()/CLOCKS_PER_SEC;
   315   lcpoNeighborList.
reset();
   317   cut2 = 2*maxAtomRadius+cutMargin; cut2 *= cut2;
   320   for (
int pI = 0; pI < 8; pI++) {
   325     for (
int s = 0; s < 
minPart; s++) {
   331     int numAtomsInBounds = 0;
   334     for ( 
int ngi = minIg; ngi < 
numAtoms[pI]; ) {
   336       if ( isInBounds(ngir.
x, ngir.
y, ngir.
z) && 
lcpoType[pI][ngi] > 0 ) {
   337         inAtoms[numAtomsInBounds++] = ngi;
   338         ri = probeRadius+lcpoParams[ 
lcpoType[pI][ngi] ][0];
   339         maxAtomRadius = (ri > maxAtomRadius) ? ri : maxAtomRadius;
   343         for (
int pJ = 0; pJ < 8; pJ++) {
   349         int numLcpoNeighbors = 0;
   352         for (
int pJ = 0; pJ < 8; pJ++) {
   354           if (!
valid[pI][pJ]) 
continue;
   357           for ( 
int ngj = 0; ngj < 
numAtoms[pJ]; ) {
   360             dxij = ngir.
x - ngjr.
x;
   361             dyij = ngir.
y - ngjr.
y;
   362             dzij = ngir.
z - ngjr.
z;
   365             r2ij = dxij*dxij + dyij*dyij + dzij*dzij;
   367             if (r2ij < cut2 && r2ij > 0.01) {
   370               rj = probeRadius+lcpoParams[ 
lcpoType[pJ][ngj] ][0];
   372               BigReal rirjcutMargin2 = ri+rj+cutMargin;
   373               rirjcutMargin2 *= rirjcutMargin2;
   374               if (r2ij < rirjcutMargin2 && r2ij > 0.0001 &&
   376                 lcpoNeighbors[numLcpoNeighbors].
x = ngjr.
x;
   377                 lcpoNeighbors[numLcpoNeighbors].
y = ngjr.
y;
   378                 lcpoNeighbors[numLcpoNeighbors].
z = ngjr.
z;
   379                 lcpoNeighbors[numLcpoNeighbors].
r = rj;
   380                 lcpoNeighbors[numLcpoNeighbors].
f =
   384                 maxAtomRadius = (rj > maxAtomRadius) ? rj : maxAtomRadius;
   392         lcpoNeighborList.
newsize(numLcpoNeighbors);
   395       for (
int s = 0; s < strideIg; s++) {
   400     inAtomsPl.
newsize(numAtomsInBounds);
   403   double t_stop = 1.0*clock()/CLOCKS_PER_SEC;
   404   CkPrintf(
"LCPO_TIME_P %7.3f Gflops %9d @ %f\n", flops*1e-9/(t_stop-t_start),flops,(t_stop-t_start));
   408   double t_start = 1.0*clock()/CLOCKS_PER_SEC;
   414   lcpoNeighborList.
reset();
   415   cut2 = maxAtomRadius*2; cut2 *= cut2;
   428   for (
int pI = 0; pI < 8; pI++) {
   433     inAtomsPl.
nextlist( &inAtoms, &numInAtoms );
   435     for (
int i = 0; i < numInAtoms; i++) {
   436       int iIndex = inAtoms[i];
   441       const Real *lcpoParamI = lcpoParams[ 
lcpoType[pI][iIndex] ];
   442       ri = probeRadius+lcpoParamI[0];
   445       Real P1 = lcpoParamI[1];
   446       Real P2 = lcpoParamI[2];
   447       Real P3 = lcpoParamI[3];
   448       Real P4 = lcpoParamI[4];
   466       BigReal dAijdrijdxiAjkSum = 0.0;
   467       BigReal dAijdrijdyiAjkSum = 0.0;
   468       BigReal dAijdrijdziAjkSum = 0.0;
   474       int numLcpoNeighbors;
   475       lcpoNeighborList.
nextlist( &lcpoNeighbors, &numLcpoNeighbors );
   477       for (
int j = 0; j < numLcpoNeighbors; j++) {
   478         Real xj = lcpoNeighbors[j].
x;
   479         Real yj = lcpoNeighbors[j].
y;
   480         Real zj = lcpoNeighbors[j].
z;
   481         Real rj = lcpoNeighbors[j].
r;
   487         r2ij = dxij*dxij + dyij*dyij + dzij*dzij;
   489         if (r2ij >= cut2 || r2ij < 0.01) { 
continue; }
   495         if ( r2ij >= rirj2 ) { 
continue; }
   508         BigReal dAijdrij = 
PI*ri*(rij_1*rij_1*(ri*ri-rj*rj)-1);
   509         BigReal dAijdrijdxj = dAijdrij*dxij*rij_1; 
   510         BigReal dAijdrijdyj = dAijdrij*dyij*rij_1;
   511         BigReal dAijdrijdzj = dAijdrij*dzij*rij_1;
   522         for (
int k = 0; k < numLcpoNeighbors; k++) {
   523           Real xk = lcpoNeighbors[k].
x;
   524           Real yk = lcpoNeighbors[k].
y;
   525           Real zk = lcpoNeighbors[k].
z;
   526           Real rk = lcpoNeighbors[k].
r;
   532           r2ik = dxik*dxik + dyik*dyik + dzik*dzik;
   534           if (r2ik >= cut2 || r2ik < 0.01) { 
continue; }
   540           r2jk = dxjk*dxjk + dyjk*dyjk + dzjk*dzjk;
   542           if (r2jk >= cut2 || r2jk < 0.01) { 
continue; }
   548           if ( r2ik >= rirk2 ) { 
continue; }
   554           if ( r2jk >= rjrk2 ) { 
continue; }
   570           BigReal dAjkdrjk = 
PI*rj*rjk_1*(rjk_1*rjk_1*(rj*rj-rk*rk) - 1.f);
   571           BigReal dAjkdrjkdxj = -dAjkdrjk*dxjk; 
   572           BigReal dAjkdrjkdyj = -dAjkdrjk*dyjk;
   573           BigReal dAjkdrjkdzj = -dAjkdrjk*dzjk;
   574           lcpoNeighbors[k].
f->
x -= -dAjkdrjkdxj*(P3+P4*Aij)*surfTen; 
   575           lcpoNeighbors[k].
f->
y -= -dAjkdrjkdyj*(P3+P4*Aij)*surfTen;
   576           lcpoNeighbors[k].
f->
z -= -dAjkdrjkdzj*(P3+P4*Aij)*surfTen;
   578           dAjkdrjkdxjSum += dAjkdrjkdxj; 
   579           dAjkdrjkdyjSum += dAjkdrjkdyj;
   580           dAjkdrjkdzjSum += dAjkdrjkdzj;
   587         AijAjkSum += Aij*AjkjSum;
   592         BigReal lastxj = dAijdrijdxj*AjkjSum + Aij*dAjkdrjkdxjSum; 
   593         BigReal lastyj = dAijdrijdyj*AjkjSum + Aij*dAjkdrjkdyjSum;
   594         BigReal lastzj = dAijdrijdzj*AjkjSum + Aij*dAjkdrjkdzjSum;
   595         BigReal dAidxj = (P2*dAijdrijdxj + P3*dAjkdrjkdxjSum + P4*lastxj);
   596         BigReal dAidyj = (P2*dAijdrijdyj + P3*dAjkdrjkdyjSum + P4*lastyj);
   597         BigReal dAidzj = (P2*dAijdrijdzj + P3*dAjkdrjkdzjSum + P4*lastzj);
   598         lcpoNeighbors[j].
f->
x -= dAidxj*surfTen;
   599         lcpoNeighbors[j].
f->
y -= dAidyj*surfTen;
   600         lcpoNeighbors[j].
f->
z -= dAidzj*surfTen;
   603         dAijdrijdxiSum -= dAijdrijdxj; 
   604         dAijdrijdyiSum -= dAijdrijdyj;
   605         dAijdrijdziSum -= dAijdrijdzj;
   606         dAijdrijdxiAjkSum -= dAijdrijdxj*AjkjSum; 
   607         dAijdrijdyiAjkSum -= dAijdrijdyj*AjkjSum;
   608         dAijdrijdziAjkSum -= dAijdrijdzj*AjkjSum;
   615       BigReal dAidxi = (P2*dAijdrijdxiSum + P4*dAijdrijdxiAjkSum); 
   616       BigReal dAidyi = (P2*dAijdrijdyiSum + P4*dAijdrijdyiAjkSum);
   617       BigReal dAidzi = (P2*dAijdrijdziSum + P4*dAijdrijdziAjkSum);
   625       BigReal SAi = P1*S1 + P2*AijSum + P3*AjkSum + P4*AijAjkSum;
   628       totalSurfaceArea += SAi;
   633   double t_stop = 1.0*clock()/CLOCKS_PER_SEC;
   634   CkPrintf(
"LCPO_TIME_F %7.3f Gflops %9d @ %f\n", 1e-9*flops/(t_stop-t_start),flops, (t_stop-t_start));
   650 const Real ComputeLCPO::lcpoParams[23][5] = { 
   651     { 0.00, 0.0000e+00,  0.0000e+00,  0.0000e+00, 0.0000e+00 }, 
   652     { 1.70, 7.7887e-01, -2.8063e-01, -1.2968e-03, 3.9328e-04 }, 
   653     { 1.70, 5.6482e-01, -1.9608e-01, -1.0219e-03, 2.6580e-04 }, 
   654     { 1.70, 2.3348e-01, -7.2627e-02, -2.0079e-04, 7.9670e-05 }, 
   655     { 1.70, 0.0000e+00,  0.0000e+00,  0.0000e+00, 0.0000e+00 }, 
   656     { 1.70, 5.1245e-01, -1.5966e-01, -1.9781e-04, 1.6392e-04 }, 
   657     { 1.70, 7.0344e-02, -1.9015e-02, -2.2009e-05, 1.6875e-05 }, 
   658     { 1.60, 7.7914e-01, -2.5262e-01, -1.6056e-03, 3.5071e-04 }, 
   659     { 1.60, 4.9392e-01, -1.6038e-01, -1.5512e-04, 1.6453e-04 }, 
   660     { 1.60, 6.8563e-01, -1.8680e-01, -1.3557e-03, 2.3743e-04 }, 
   661     { 1.60, 8.8857e-01, -3.3421e-01, -1.8683e-03, 4.9372e-04 }, 
   662     { 1.65, 7.8602e-02, -2.9198e-01, -6.5370e-04, 3.6247e-04 }, 
   663     { 1.65, 2.2599e-01, -3.6648e-02, -1.2297e-03, 8.0038e-05 }, 
   664     { 1.65, 5.1481e-02, -1.2603e-02, -3.2006e-04, 2.4774e-05 }, 
   665     { 1.65, 7.3511e-01, -2.2116e-01, -8.9148e-04, 2.5230e-04 }, 
   666     { 1.65, 4.1102e-01, -1.2254e-01, -7.5448e-05, 1.1804e-04 }, 
   667     { 1.65, 6.2577e-02, -1.7874e-02, -8.3120e-05, 1.9849e-05 }, 
   668     { 1.90, 7.7220e-01, -2.6393e-01,  1.0629e-03, 2.1790e-04 }, 
   669     { 1.90, 5.4581e-01, -1.9477e-01, -1.2873e-03, 2.9247e-04 }, 
   670     { 1.90, 3.8650e-01, -1.8249e-01, -3.6598e-03, 4.2640e-04 }, 
   671     { 1.90, 3.8730e-02, -8.9339e-03,  8.3582e-06, 3.0381e-06 }, 
   672     { 1.80, 9.8318e-01, -4.0437e-01,  1.1249e-04, 4.9901e-04 }, 
   673     { 1.18, 4.9392e-01, -1.6038e-01, -1.5512e-04, 1.6453e-04 }  
 
void setNumPatches(int n)
 
#define PROXY_RESULTS_PRIORITY
 
Box< Patch, int > * lcpoTypeBox[8]
 
BigReal max_a(int pid) const
 
static PatchMap * Object()
 
virtual void submit(void)=0
 
SimParameters * simParameters
 
void unregisterForceDeposit(Compute *cid, Box< Patch, Results > **const box)
 
void startWork(const LDObjHandle &handle)
 
virtual void initialize()
 
SubmitReduction * willSubmit(int setID, int size=-1)
 
int index_a(int pid) const
 
static ReductionMgr * Object(void)
 
Box< Patch, int > * registerLcpoTypePickup(Compute *cid)
 
Patch * patch(PatchID pid)
 
Box< Patch, Results > * forceBox[8]
 
int gridsize_c(void) const
 
virtual void atomUpdate()
 
int gridsize_a(void) const
 
void nextlist(plint **list, int *list_size)
 
virtual void initialize()
 
BigReal min_c(int pid) const
 
int index_b(int pid) const
 
void skipWork(const LDObjHandle &handle)
 
void newsize(int list_size)
 
Box< Patch, CompAtom > * positionBox[8]
 
static LdbCoordinator * Object()
 
BigReal min_a(int pid) const
 
LCPOAtom * newlist(int max_size)
 
int index_c(int pid) const
 
ComputeLCPO(ComputeID c, PatchID pid[], int t[], ComputeNonbondedWorkArrays *_workArrays, int minPartition, int maxPartition, int numPartitions, int numPatches)
 
void unregisterPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
 
void endWork(const LDObjHandle &handle)
 
BigReal max_b(int pid) const
 
BigReal calcOverlap(BigReal r, Real ri, Real rj)
 
void newsize(int list_size)
 
BigReal max_c(int pid) const
 
int gridsize_b(void) const
 
void nextlist(LCPOAtom **list, int *list_size)
 
plint * newlist(int max_size)
 
SubmitReduction * reduction
 
BigReal min_b(int pid) const
 
void unregisterLcpoTypePickup(Compute *cid, Box< Patch, int > **const box)
 
void close(Data **const t)
 
Box< Patch, CompAtom > * registerPositionPickup(Compute *cid)
 
#define PATCH_PRIORITY(PID)
 
CompAtomExt * getCompAtomExtInfo()
 
Box< Patch, Results > * registerForceDeposit(Compute *cid)