11 #ifndef COMPUTENONBONDEDINL_H    12 #define COMPUTENONBONDEDINL_H    34 inline int pairlist_from_pairlist_knl(
float cutoff2,
    35                                   float p_i_x, 
float p_i_y, 
float p_i_z,
    36                                   const CompAtomFlt *
p_j,
    37                                   const plint *list, 
int list_size, 
int *newlist,
    38                                   float *r2list, 
float *xlist, 
float *ylist, 
float *zlist) {
    46   if ( list_size <= 0) 
return 0;
    50   float p_j_x, p_j_y, p_j_z;
    52 #pragma vector aligned    54   for ( g = 0 ; g < list_size; ++g ) {
    56     p_j_x = 
p_j[ gi ].position.x;
    57     p_j_y = 
p_j[ gi ].position.y;
    58     p_j_z = 
p_j[ gi ].position.z;
    67     if ( r2 <= cutoff2 ) {
    83                                   const plint *list, 
int list_size, 
int *newlist,
    86   BigReal cutoff2_delta = cutoff2 + r2_delta;
    90   if ( list_size <= 0) 
return 0;
    98 #pragma vector aligned   100   for ( g = 0 ; g < list_size; ++g ) {
   102     p_j_x = 
p_j[ gi ].position.x;
   103     p_j_y = 
p_j[ gi ].position.y;
   104     p_j_z = 
p_j[ gi ].position.z;
   107     r2 = x2 * x2 + r2_delta;
   113     if ( r2 <= cutoff2_delta ) {
   120 #ifndef SIMPLE_PAIRLIST   127   if ( list_size > 16) {
   130     int jcur1 = list[g + 1];
   131     int jcur2 = list[g + 2];
   132     int jcur3 = list[g + 3];
   136     vector4double    pj_v_0, pj_v_1, pj_v_2, pj_v_3; 
   137     vector4double    v_0, v_1, v_2, v_3;
   138     register BigReal r2_0, r2_1, r2_2, r2_3;
   140     vector4double p_i_v = {p_i_x, p_i_y, p_i_z, 0.};
   141     vector4double r2_delta_v = {r2_delta};
   148     for ( g = 4 ; g < list_size - 4; g += 4 ) {
   152       j0   =   jcur0;           j1   =   jcur1;
   153       j2   =   jcur2;           j3   =   jcur3;
   155       jcur0  =  list[g    ];    jcur1  =  list[g + 1];
   156       jcur2  =  list[g + 2];    jcur3  =  list[g + 3];
   158       __dcbt((
void*)(
p_j + jcur0));
   160       v_0 = vec_sub (p_i_v, pj_v_0);
   161       v_1 = vec_sub (p_i_v, pj_v_1);
   162       v_2 = vec_sub (p_i_v, pj_v_2);
   163       v_3 = vec_sub (p_i_v, pj_v_3);
   165       v_0 = vec_madd (v_0, v_0, r2_delta_v);
   166       v_1 = vec_madd (v_1, v_1, r2_delta_v);
   167       v_2 = vec_madd (v_2, v_2, r2_delta_v);
   168       v_3 = vec_madd (v_3, v_3, r2_delta_v);
   175       r2_0 = vec_extract(v_0, 0) + vec_extract(v_0, 1) + vec_extract(v_0, 2);
   176       r2_1 = vec_extract(v_1, 0) + vec_extract(v_1, 1) + vec_extract(v_1, 2);
   177       r2_2 = vec_extract(v_2, 0) + vec_extract(v_2, 1) + vec_extract(v_2, 2);
   178       r2_3 = vec_extract(v_3, 0) + vec_extract(v_3, 1) + vec_extract(v_3, 2);
   180       size_t test0, test1, test2, test3;
   181       size_t jout0, jout1, jout2, jout3;
   183       test0 = ( r2_0   <   cutoff2_delta );
   184       test1 = ( r2_1   <   cutoff2_delta );
   185       test2 = ( r2_2   <   cutoff2_delta );
   186       test3 = ( r2_3   <   cutoff2_delta );
   189       nli[ jout0 ]  = j0;         r2i[ jout0 ] = r2_0;
   190       jout += test0;              jout1 = jout;
   192       nli[ jout1 ]  = j1;         r2i[ jout1 ] = r2_1;
   193       jout += test1;              jout2 = jout;
   195       nli[ jout2 ]  = j2;         r2i[ jout2 ] = r2_2;
   196       jout += test2;              jout3 = jout;
   198       nli[ jout3 ]  = j3;         r2i[ jout3 ] = r2_3;
   212   if ( list_size > 16) {
   215     int jcur1 = list[g + 1];
   216     int jcur2 = list[g + 2];
   217     int jcur3 = list[g + 3];
   221     register  BigReal pj_x_0, pj_x_1, pj_x_2, pj_x_3; 
   222     register  BigReal pj_y_0, pj_y_1, pj_y_2, pj_y_3; 
   223     register  BigReal pj_z_0, pj_z_1, pj_z_2, pj_z_3; 
   225     register BigReal t_0, t_1, t_2, t_3, r2_0, r2_1, r2_2, r2_3;
   227     pj_x_0 = 
p_j[jcur0].position.x;
   228     pj_x_1 = 
p_j[jcur1].position.x;  
   229     pj_x_2 = 
p_j[jcur2].position.x;  
   230     pj_x_3 = 
p_j[jcur3].position.x;  
   231     pj_y_0 = 
p_j[jcur0].position.y; 
   232     pj_y_1 = 
p_j[jcur1].position.y;  
   233     pj_y_2 = 
p_j[jcur2].position.y;  
   234     pj_y_3 = 
p_j[jcur3].position.y;  
   235     pj_z_0 = 
p_j[jcur0].position.z; 
   236     pj_z_1 = 
p_j[jcur1].position.z;
   237     pj_z_2 = 
p_j[jcur2].position.z; 
   238     pj_z_3 = 
p_j[jcur3].position.z;
   240     for ( g = 4 ; g < list_size - 4; g += 4 ) {
   245       j0   =   jcur0;           j1   =   jcur1;
   246       j2   =   jcur2;           j3   =   jcur3;
   248       jcur0  =  list[g    ];    jcur1  =  list[g + 1];
   249       jcur2  =  list[g + 2];    jcur3  =  list[g + 3];
   252       __dcbt ((
void *) &
p_j[jcur0]);
   256       t_0   =  p_i_x - pj_x_0;   t_1   =  p_i_x - pj_x_1;
   257       t_2   =  p_i_x - pj_x_2;   t_3   =  p_i_x - pj_x_3;
   259       r2_0  =  t_0 * t_0 + r2_delta; 
   260       r2_1  =  t_1 * t_1 + r2_delta;
   261       r2_2  =  t_2 * t_2 + r2_delta;
   262       r2_3  =  t_3 * t_3 + r2_delta;
   265       t_0    =  p_i_y - pj_y_0;   t_1    =  p_i_y - pj_y_1;
   266       t_2    =  p_i_y - pj_y_2;   t_3    =  p_i_y - pj_y_3;
   267       r2_0  +=  t_0 * t_0;        r2_1  +=  t_1 * t_1;
   268       r2_2  +=  t_2 * t_2;        r2_3  +=  t_3 * t_3;
   271       t_0    =  p_i_z - pj_z_0;   t_1    =  p_i_z - pj_z_1;
   272       t_2    =  p_i_z - pj_z_2;   t_3    =  p_i_z - pj_z_3;
   273       r2_0  +=  t_0 * t_0;        r2_1  +=  t_1 * t_1;
   274       r2_2  +=  t_2 * t_2;        r2_3  +=  t_3 * t_3;
   276       pj_x_0 = 
p_j[jcur0].position.x;
   277       pj_x_1 = 
p_j[jcur1].position.x;  
   278       pj_x_2 = 
p_j[jcur2].position.x;  
   279       pj_x_3 = 
p_j[jcur3].position.x;  
   280       pj_y_0 = 
p_j[jcur0].position.y; 
   281       pj_y_1 = 
p_j[jcur1].position.y;  
   282       pj_y_2 = 
p_j[jcur2].position.y;  
   283       pj_y_3 = 
p_j[jcur3].position.y;  
   284       pj_z_0 = 
p_j[jcur0].position.z; 
   285       pj_z_1 = 
p_j[jcur1].position.z;
   286       pj_z_2 = 
p_j[jcur2].position.z; 
   287       pj_z_3 = 
p_j[jcur3].position.z;
   289       bool test0, test1, test2, test3;
   291       test0 = ( r2_0   <   cutoff2_delta );
   292       test1 = ( r2_1   <   cutoff2_delta );
   293       test2 = ( r2_2   <   cutoff2_delta );
   294       test3 = ( r2_3   <   cutoff2_delta );
   296       int jout0, jout1, jout2, jout3;
   299       nli[ jout0 ]  = j0;         r2i[ jout0 ] = r2_0;
   300       jout += test0;              jout1 = jout;
   301       nli[ jout1 ]  = j1;         r2i[ jout1 ] = r2_1;
   302       jout += test1;              jout2 = jout;
   303       nli[ jout2 ]  = j2;         r2i[ jout2 ] = r2_2;
   304       jout += test2;              jout3 = jout;
   305       nli[ jout3 ]  = j3;         r2i[ jout3 ] = r2_3;
   321   while ( g < list_size ) {
   325     BigReal r2 = t2 * t2 + r2_delta;
   326     p_j_x = 
p_j[j2].position.x;
   329     p_j_y = 
p_j[j2].position.y;
   332     p_j_z = 
p_j[j2].position.z;
   333     if ( r2 <= cutoff2_delta ) {
   341   return nli - newlist;
   352 #if NAMD_ComputeNonbonded_SortAtoms != 0   358   for (i = 0; i < seLen; i++) {
   363     register int smallestIndex = i;
   366     for (j = i + 1; j < seLen; j++) {
   368       if (currentValue < smallestValue) {
   370         smallestValue = currentValue;
   375     if (smallestIndex != i) {
   377       register SortEntry* entryB = se + smallestIndex;
   378       register unsigned int tmpIndex = entryA->
index;
   382       entryB->
index = tmpIndex;
   390   register int keepSorting = 0;
   399     for (
int i = 1; i < seLen; i++) {
   401       register SortEntry* sortEntry0 = sortEntry1;
   406       if (sortEntry0_sortValue > sortEntry1_sortValue) {
   407         register int sortEntry0_index = sortEntry0->
index;
   408         register int sortEntry1_index = sortEntry1->
index;
   409         sortEntry0->
index = sortEntry1_index;
   410         sortEntry0->
sortValue = sortEntry1_sortValue;
   411         sortEntry1->
index = sortEntry0_index;
   412         sortEntry1->
sortValue = sortEntry0_sortValue;
   417   } 
while (keepSorting != 0);  
   432   register int subListSize = 1;
   433   while (subListSize < seLen) {
   441     register int firstListOffset = 0;
   442     while (firstListOffset < seLen) {
   446       register int numElements = std::min(2 * subListSize, seLen - firstListOffset);
   447       register int list0len;
   448       register int list1len;
   449       if (numElements > subListSize) {
   450         list0len = subListSize;                
   451         list1len = numElements - subListSize;  
   453         list0len = numElements;                
   457       register SortEntry* list0ptr = srcArray + firstListOffset;
   458       register SortEntry* list1ptr = list0ptr + subListSize;
   459       register SortEntry* dstptr = dstArray + firstListOffset;
   464       while (list0len > 0 && list1len > 0) {
   469         if (sortValue0 < sortValue1) {  
   472           register int index0 = list0ptr->
index;
   474           dstptr->
index = index0;
   484           register int index1 = list1ptr->
index;
   486           dstptr->
index = index1;
   500       while (list0len > 0) {
   504         register int index0 = list0ptr->
index;
   506         dstptr->
index = index0;
   516       while (list1len > 0) {
   520         register int index1 = list1ptr->
index;
   522         dstptr->
index = index1;
   532       firstListOffset += (2 * subListSize);
   560   #define __TERNARY_ASSIGN(test, val0, val1)   ((test * val0) + ((1 - test) * val1))   567   register int subListSize = 1;
   568   while (subListSize < seLen) {
   576     register int firstListOffset = 0;
   577     while (firstListOffset < seLen) {
   583       register int numElements;
   585         register int numElements_val0 = 2 * subListSize;
   586         register int numElements_val1 = seLen - firstListOffset;
   587         register bool numElements_test = (numElements_val0 < numElements_val1);
   588         numElements = 
__TERNARY_ASSIGN(numElements_test, numElements_val0, numElements_val1);
   592       register SortEntry* dstptr = dstArray + firstListOffset;    
   593       register SortEntry* list0ptr = srcArray + firstListOffset;  
   594       register SortEntry* list1ptr = list0ptr + subListSize;      
   598         register bool lenTest = (numElements > subListSize);
   599         register int list0len_val0 = subListSize;
   600         register int list1len_val0 = numElements - subListSize;
   601         register int list0len_val1 = numElements;  
   602         register int list0len = 
__TERNARY_ASSIGN(lenTest, list0len_val0, list0len_val1);
   605         if ( ! lenTest ) list1ptr = list0ptr;
   606         list0ptr_end = list0ptr + list0len;
   607         list1ptr_end = list1ptr + list1len;
   613       firstListOffset += (2 * subListSize);
   620       register int index0 = list0ptr->
index;
   621       register int index1 = list1ptr->
index;
   626       while (list0ptr < list0ptr_end && list1ptr < list1ptr_end) {
   629         register bool test = (sortValue0 < sortValue1);
   644         index0 = list0ptr->
index;
   645         index1 = list1ptr->
index;
   653       while (list0ptr < list0ptr_end) {
   657         dstptr->
index = index0;
   663         index0 = list0ptr->
index;
   668       while (list1ptr < list1ptr_end) {
   672         dstptr->
index = index1;
   678         index1 = list1ptr->
index;
   699   #undef __TERNARY_ASSIGN   702 #endif  // NAMD_ComputeNonbonded_SortAtoms != 0   705 #endif // COMPUTENONBONDEDINL_H void sortEntries_mergeSort_v1(SortEntry *&se, SortEntry *&buf, int seLen)
 
#define __TERNARY_ASSIGN(test, val0, val1)
 
void sortEntries_selectionSort(SortEntry *const se, const int seLen)
 
int pairlist_from_pairlist(BigReal cutoff2, BigReal p_i_x, BigReal p_i_y, BigReal p_i_z, const CompAtom *p_j, const plint *list, int list_size, int *newlist, BigReal r2_delta, BigReal *r2list)
 
void sortEntries_bubbleSort(SortEntry *const se, const int seLen)
 
void sortEntries_mergeSort_v2(SortEntry *&se, SortEntry *&buf, int seLen)