23 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) 24 #include <emmintrin.h> 25 #if defined(__INTEL_COMPILER) 26 #define __align(X) __declspec(align(X) ) 27 #elif defined(__GNUC__) || defined(__PGI) 28 #define __align(X) __attribute__((aligned(X) )) 30 #define __align(X) __declspec(align(X) ) 35 #define UNLIKELY(X) __builtin_expect((X), 0) 37 #define UNLIKELY(X) (X) 40 #ifdef DEFINITION // ( 46 #if NAMD_ComputeNonbonded_SortAtoms != 0 54 #define NAME CLASSNAME(calc) 60 #define CLASS ComputeNonbondedPair 61 #define CLASSNAME(X) ENERGYNAME( X ## _pair ) 70 #define CLASS ComputeNonbondedSelf 71 #define CLASSNAME(X) ENERGYNAME( X ## _self ) 83 #define ENERGYNAME(X) SLOWONLYNAME( X ## _energy ) 87 #define ENERGYNAME(X) SLOWONLYNAME( X ) 98 #define SLOWONLYNAME(X) FULLDISPNAME( X ## _slow ) 102 #define SLOWONLYNAME(X) FULLDISPNAME( X ) 113 #define FULLDISPNAME(X) MERGEELECTNAME( X ## _disp ) 117 #define FULLDISPNAME(X) MERGEELECTNAME( X ) 124 #undef MERGEELECTNAME 130 #define MERGEELECTNAME(X) FULLELECTNAME( X ## _merge ) 134 #define MERGEELECTNAME(X) FULLELECTNAME( X ) 142 #define FULLELECTNAME(X) TABENERGYNAME( X ## _fullelect ) 146 #define FULLELECTNAME(X) TABENERGYNAME( X ) 156 #define TABENERGYNAME(X) FEPNAME( X ## _tabener ) 157 #define TABENERGY(X) X 158 #define NOTABENERGY(X) 160 #define TABENERGYNAME(X) FEPNAME( X ) 162 #define NOTABENERGY(X) X 182 #define FEPNAME(X) LAST( X ) 185 #define NOT_ALCHPAIR(X) X 197 #define FEPNAME(X) LAST( X ## _fep ) 205 #define FEPNAME(X) LAST( X ## _ti ) 213 #define FEPNAME(X) LAST( X ## _les ) 220 #define FEPNAME(X) LAST( X ## _int ) 227 #define FEPNAME(X) LAST( X ## _pprof ) 234 #define FEPNAME(X) LAST( X ## _go ) 250 #define KNL_MAKE_DEPENDS_INCLUDE 252 #undef KNL_MAKE_DEPENDS_INCLUDE 257 #if ( TABENERGY(1+) FEP(1+) TI(1+) INT(1+) LES(1+) GO(1+) PPROF(1+) NOFAST(1+) 0 ) 269 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) 270 #define COMPONENT_DOTPRODUCT(A,B) ((A##_x * B##_x) + (A##_y * B##_y) + (A##_z * B##_z)) 305 int exclChecksum = 0;
313 BigReal goEnergyNonnative = 0; ) )
350 const float offset_x_f = params->
offset_f.
x;
351 const float offset_y_f = params->
offset_f.
y;
352 const float offset_z_f = params->
offset_f.
z;
354 register const BigReal plcutoff2 = \
356 register const BigReal groupplcutoff2 = \
383 const float scaling_f =
scaling;
392 const float c1_f =
c1;
393 const float c3_f =
c3;
403 const BigReal rcut2inv = 1 / rcut2;
404 const BigReal rcut6inv = rcut2inv * rcut2inv * rcut2inv;
405 const BigReal rcut12inv = rcut6inv * rcut6inv;
407 const BigReal aRc2 = aRc * aRc;
408 const BigReal screen_rc = (1 - (1 + aRc2 + aRc2*aRc2/2)*exp(-aRc2));
416 const BigReal switchfactor = 1./(diff*diff*diff);
441 BigReal lambdaDown = 1 - alchLambda;
472 int pswitchTable[5*5];
479 for (
int ip=0; ip<5; ++ip) {
480 for (
int jp=0; jp<5; ++jp ) {
481 pswitchTable[ip+5*jp] = 0;
482 if ((ip==1 && jp==0) || (ip==0 && jp==1)) pswitchTable[ip+5*jp] = 1;
483 if ((ip==2 && jp==0) || (ip==0 && jp==2)) pswitchTable[ip+5*jp] = 2;
484 if ((ip==3 && jp==0) || (ip==0 && jp==3)) pswitchTable[ip+5*jp] = 3;
485 if ((ip==4 && jp==0) || (ip==0 && jp==4)) pswitchTable[ip+5*jp] = 4;
487 if (ip && jp && (abs(ip - jp) != 2)) pswitchTable[ip+5*jp] = 99;
491 if ((ip == 1 && jp == 1) || (ip == 1 && jp == 3) || (ip == 3 && jp == 1)) pswitchTable[ip+5*jp] = 1;
492 if ((ip == 2 && jp == 2) || (ip == 2 && jp == 4) || (ip == 4 && jp == 2)) pswitchTable[ip+5*jp] = 2;
493 if (ip == 3 && jp == 3) pswitchTable[ip+5*jp] = 3;
494 if (ip == 4 && jp == 4) pswitchTable[ip+5*jp] = 4;
502 if (ip == 1 && jp == 1) pswitchTable[ip+5*jp] = 0;
503 if (ip == 2 && jp == 2) pswitchTable[ip+5*jp] = 0;
515 BigReal fullElectEnergy_ti_2 = 0;)
520 const int i_upper = params->
numAtoms[0];
521 register const int j_upper = params->
numAtoms[1];
526 KNL(
const CompAtomFlt *pFlt_0 = params->pFlt[0]; )
527 KNL(
const CompAtomFlt *pFlt_1 = params->pFlt[1]; )
531 char * excl_flags_buff = 0;
532 const int32 * full_excl = 0;
533 const int32 * mod_excl = 0;
535 plint *pairlistn_save;
int npairn;
536 plint *pairlistx_save;
int npairx;
537 plint *pairlistm_save;
int npairm;
542 plint *pairlistnA1_save;
int npairnA1;
543 plint *pairlistxA1_save;
int npairxA1;
544 plint *pairlistmA1_save;
int npairmA1;
545 plint *pairlistnA2_save;
int npairnA2;
546 plint *pairlistxA2_save;
int npairxA2;
547 plint *pairlistmA2_save;
int npairmA2;
548 plint *pairlistnA3_save;
int npairnA3;
549 plint *pairlistxA3_save;
int npairxA3;
550 plint *pairlistmA3_save;
int npairmA3;
551 plint *pairlistnA4_save;
int npairnA4;
552 plint *pairlistxA4_save;
int npairxA4;
553 plint *pairlistmA4_save;
int npairmA4;
558 int arraysize = j_upper+5;
567 union {
double f;
int32 i[2]; } byte_order_test;
568 byte_order_test.f = 1.0;
569 int32 *r2iilist = (
int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
571 if ( ! ( savePairlists || ! usePairlists ) ) arraysize = 0;
582 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) 595 register SortEntry* p_1_sortValues = atomSort_0_sortValues__;
596 register SortEntry* p_1_sortValues_fixg = atomSort_1_sortValues__;
598 int p_0_sortValues_len = 0;
599 int p_1_sortValues_len = 0;
600 int p_1_sortValues_fixg_len = 0;
604 BigReal atomSort_windowRadius = sqrt(groupplcutoff2);
606 if (savePairlists || !usePairlists) {
614 register int nbgs = p_1->nonbondedGroupSize;
615 register BigReal p_x = p_1->position.x;
616 register BigReal p_y = p_1->position.y;
617 register BigReal p_z = p_1->position.z;
618 register int index = 0;
620 for (
register int j = nbgs; j < j_upper; j += nbgs) {
624 register const CompAtom* p_j_next = p_1 + j;
641 register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
642 p_1_sortValStorePtr->
index = index;
643 p_1_sortValStorePtr->
sortValue = sortVal;
644 p_1_sortValues_len++;
653 register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
654 p_1_sortValStorePtr->
index = index;
655 p_1_sortValStorePtr->
sortValue = sortVal;
656 p_1_sortValues_len++;
661 #if 0 // Selection Sort 663 #elif 0 // Bubble Sort 666 #if NAMD_ComputeNonbonded_SortAtoms_LessBranches == 0 675 register int nbgs = p_0->nonbondedGroupSize;
676 register BigReal p_x = p_0->position.x + offset_x;
677 register BigReal p_y = p_0->position.y + offset_y;
678 register BigReal p_z = p_0->position.z + offset_z;
679 register int index = 0;
681 for (
register int i = nbgs; i < i_upper; i += nbgs) {
685 register const CompAtom* p_i_next = p_0 + i;
697 register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
698 *p_0_sortValStorePtr = sortVal;
706 register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
707 *p_0_sortValStorePtr = sortVal;
709 p_0_sortValues_len = i_upper;
714 #endif // NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) 718 #if ! (NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) ) 746 if ( savePairlists || ! usePairlists ) {
748 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 751 register int fixg = 0;
752 for (
int tmpI = 0; tmpI < p_1_sortValues_len; tmpI++) {
753 register SortEntry* p_1_sortEntry = p_1_sortValues + tmpI;
754 register int p_1_index = p_1_sortEntry->
index;
755 if (!pExt_1[p_1_index].groupFixed) {
756 register SortEntry* p_1_sortEntry_fixg = p_1_sortValues_fixg + p_1_sortValues_fixg_len;
757 p_1_sortEntry_fixg->
index = p_1_sortEntry->
index;
759 p_1_sortValues_fixg_len++;
766 for ( j = 0; j < j_upper; ++j ) {
767 if ( p_1[j].nonbondedGroupSize ) {
772 if ( g_upper ) grouplist[g_upper] = grouplist[g_upper-1];
776 for ( g = 0; g < g_upper; ++g ) {
778 if ( ! pExt_1[j].groupFixed ) {
779 fixglist[fixg++] = j;
785 if ( fixg_upper ) fixglist[fixg_upper] = fixglist[fixg_upper-1];
787 #endif // NAMD_ComputeNonbonded_SortAtoms != 0 795 NAMD_bug(
"pairlist i_upper mismatch!");
805 int pairlistoffset=0;
809 #if ( SHORT( FAST( 1+ ) ) 0 ) 822 #define fullf_1 fullf_0 828 int groupCount = params->
minPart;
830 if ( savePairlists || ! usePairlists ) {
837 PAIR(
for ( ; i < (i_upper);))
SELF(
for ( i=0; i < (i_upper- 1);i++))
840 KNL(
const CompAtomFlt &pFlt_i = pFlt_0[i]; )
843 PAIR(
if (savePairlists || ! usePairlists){)
849 __dcbt((
void *) &(p_0[i]));
854 groupCount = maxPart;
863 register const BigReal p_i_x_f = pFlt_i.position.x + offset_x_f;
864 register const BigReal p_i_y_f = pFlt_i.position.y + offset_y_f;
865 register const BigReal p_i_z_f = pFlt_i.position.z + offset_z_f;
878 if ( savePairlists || ! usePairlists ) {
880 #ifdef MEM_OPT_VERSION 882 const int excl_min = pExt_i.id + exclcheck->
min;
883 const int excl_max = pExt_i.id + exclcheck->
max;
886 const int excl_min = exclcheck->
min;
887 const int excl_max = exclcheck->
max;
889 const char * excl_flags_var;
890 if ( exclcheck->
flags ) excl_flags_var = exclcheck->
flags - excl_min;
895 #ifndef MEM_OPT_VERSION 896 #define REZERO_EXCL_FLAGS_BUFF if ( excl_flags_buff ) { \ 898 nl = full_excl[0] + 1; \ 899 for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = 0; \ 900 nl = mod_excl[0] + 1; \ 901 for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = 0; \ 906 int oldsize = wa.
size();
908 memset( (
void*) &wa[oldsize], 0, wa.
size() - oldsize);
910 excl_flags_buff = &wa[0];
914 nl = full_excl[0] + 1;
915 for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] =
EXCHCK_FULL;
917 nl = mod_excl[0] + 1;
918 for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] =
EXCHCK_MOD;
919 excl_flags_var = excl_flags_buff;
923 const char *
const excl_flags = excl_flags_var;
929 const int groupfixed = (
fixedAtomsOn && pExt_i.groupFixed );
943 while ( g_lower < g_upper &&
944 grouplist[g_lower] < j_hgroup ) ++g_lower;
945 while ( fixg_lower < fixg_upper &&
946 fixglist[fixg_lower] < j_hgroup ) ++fixg_lower;
949 for ( j = i + 1; j < j_hgroup; ++j ) {
950 pairlist[pairlistindex++] = j;
955 register int *pli = pairlist + pairlistindex;
960 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 963 register BigReal p_i_sortValue = p_0_sortValues[i];
964 const BigReal p_i_sortValue_plus_windowRadius = p_i_sortValue + atomSort_windowRadius;
965 register SortEntry* sortValues = ( groupfixed ? p_1_sortValues_fixg : p_1_sortValues );
968 register int lower = 0;
969 register int upper = (groupfixed ? p_1_sortValues_fixg_len : p_1_sortValues_len);
970 while ((upper - lower) > 1) {
971 register int j = ((lower + upper) >> 1);
973 if (jSortVal < p_i_sortValue_plus_windowRadius) {
979 const int gu = (sortValues[lower].
sortValue >= p_i_sortValue_plus_windowRadius) ? lower : upper;
983 register int *gli = goodglist;
984 const int *glist = ( groupfixed ? fixglist : grouplist );
985 SELF(
const int gl = ( groupfixed ? fixg_lower : g_lower ); )
986 const int gu = ( groupfixed ? fixg_upper : g_upper );
995 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) 998 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 999 register SortEntry* sortEntry0 = sortValues + g;
1000 register SortEntry* sortEntry1 = sortValues + g + 1;
1001 register int jprev0 = sortEntry0->
index;
1002 register int jprev1 = sortEntry1->
index;
1004 register int jprev0 = glist[g ];
1005 register int jprev1 = glist[g + 1];
1011 __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1012 __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1013 __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1016 const __m128d P_I_X = _mm_set1_pd(p_i_x);
1017 const __m128d P_I_Y = _mm_set1_pd(p_i_y);
1018 const __m128d P_I_Z = _mm_set1_pd(p_i_z);
1021 for ( ; g < gu - 2; g +=2 ) {
1026 __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
1027 __m128d R2_01 = _mm_mul_pd(T_01, T_01);
1028 T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
1029 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1030 T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
1031 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1033 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 1034 sortEntry0 = sortValues + g;
1035 sortEntry1 = sortValues + g + 1;
1036 jprev0 = sortEntry0->
index;
1037 jprev1 = sortEntry1->
index;
1040 jprev1 = glist[g+1];
1043 PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1044 PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1045 PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1047 __align(16)
double r2_01[2];
1048 _mm_store_pd(r2_01, R2_01);
1051 bool test0 = ( r2_01[0] < groupplcutoff2 );
1052 bool test1 = ( r2_01[1] < groupplcutoff2 );
1056 goodglist [ hu ] = j0;
1057 goodglist [ hu + test0 ] = j1;
1059 hu += test0 + test1;
1066 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 1067 register SortEntry* sortEntry0 = sortValues + g;
1068 register SortEntry* sortEntry1 = sortValues + g + 1;
1069 register int jprev0 = sortEntry0->
index;
1070 register int jprev1 = sortEntry1->
index;
1072 register int jprev0 = glist[g ];
1073 register int jprev1 = glist[g + 1];
1079 register BigReal pj_x_0, pj_x_1;
1080 register BigReal pj_y_0, pj_y_1;
1081 register BigReal pj_z_0, pj_z_1;
1082 register BigReal t_0, t_1, r2_0, r2_1;
1084 pj_x_0 = p_1[jprev0].position.x;
1085 pj_x_1 = p_1[jprev1].position.x;
1087 pj_y_0 = p_1[jprev0].position.y;
1088 pj_y_1 = p_1[jprev1].position.y;
1090 pj_z_0 = p_1[jprev0].position.z;
1091 pj_z_1 = p_1[jprev1].position.z;
1094 for ( ; g < gu - 2; g +=2 ) {
1099 t_0 = p_i_x - pj_x_0;
1100 t_1 = p_i_x - pj_x_1;
1104 t_0 = p_i_y - pj_y_0;
1105 t_1 = p_i_y - pj_y_1;
1109 t_0 = p_i_z - pj_z_0;
1110 t_1 = p_i_z - pj_z_1;
1114 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 1115 sortEntry0 = sortValues + g;
1116 sortEntry1 = sortValues + g + 1;
1117 jprev0 = sortEntry0->
index;
1118 jprev1 = sortEntry1->
index;
1121 jprev1 = glist[g+1];
1124 pj_x_0 = p_1[jprev0].position.x;
1125 pj_x_1 = p_1[jprev1].position.x;
1126 pj_y_0 = p_1[jprev0].position.y;
1127 pj_y_1 = p_1[jprev1].position.y;
1128 pj_z_0 = p_1[jprev0].position.z;
1129 pj_z_1 = p_1[jprev1].position.z;
1131 bool test0 = ( r2_0 < groupplcutoff2 );
1132 bool test1 = ( r2_1 < groupplcutoff2 );
1136 goodglist [ hu ] = j0;
1137 goodglist [ hu + test0 ] = j1;
1139 hu += test0 + test1;
1147 #pragma omp simd simdlen(16) 1148 for (g = gp; g < gu; g++) {
1150 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) ) 1151 register SortEntry* sortEntry = sortValues + g;
1152 register int j = sortEntry->
index;
1157 BigReal p_j_x = p_1[j].position.x;
1158 BigReal p_j_y = p_1[j].position.y;
1159 BigReal p_j_z = p_1[j].position.z;
1168 #pragma omp ordered simd monotonic(hu:1) 1169 if ( r2 <= groupplcutoff2 )
1170 goodglist[hu ++] = j;
1173 for (
int h=0; h<hu; ++h ) {
1174 int j = goodglist[h];
1175 int nbgs = p_1[j].nonbondedGroupSize;
1188 pairlistindex = pli - pairlist;
1190 if ( pairlistindex ) {
1191 pairlist[pairlistindex] = pairlist[pairlistindex-1];
1201 else pairlistoffset++;
1204 const int atomfixed = (
fixedAtomsOn && pExt_i.atomFixed );
1206 register int *pli = pairlist2;
1209 register plint *plin = pairlistn;
1216 for (
int k=pairlistoffset; k<pairlistindex; k++) {
1222 if (qmGroup_i > 0) {
1223 if (qmGroup_i == qmGroup_j) {
1233 int npair2_int = pli - pairlist2;
1235 for (
int k=0; k<npair2_int; k++) {
1239 BigReal p_j_x = p_1[j].position.x;
1242 BigReal p_j_y = p_1[j].position.y;
1245 BigReal p_j_z = p_1[j].position.z;
1249 if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
1250 int atom2 = pExt_1[j].
id;
1251 if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
1263 if (ifep_type != 1) {
PAIR(++i;)
continue; }
1264 for (
int k=pairlistoffset; k<pairlistindex; k++) {
1266 const int jfep_type = p_1[j].partition;
1268 if (jfep_type == 1) {
1273 if (ifep_type != 1 && ifep_type != 2) {
PAIR(++i;)
continue; }
1274 for (
int k=pairlistoffset; k<pairlistindex; k++) {
1276 const int jfep_type = p_1[j].partition;
1278 if (ifep_type + jfep_type == 3) {
1283 int npair2_int = pli - pairlist2;
1285 for (
int k=0; k<npair2_int; k++) {
1287 BigReal p_j_x = p_1[j].position.x;
1290 BigReal p_j_y = p_1[j].position.y;
1293 BigReal p_j_z = p_1[j].position.z;
1296 if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
1297 int atom2 = pExt_1[j].
id;
1298 if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
1305 for (
int k=pairlistoffset; k<pairlistindex; k++) {
1307 BigReal p_j_x = p_1[j].position.x;
1310 BigReal p_j_y = p_1[j].position.y;
1313 BigReal p_j_z = p_1[j].position.z;
1316 if ( (! pExt_1[j].atomFixed) && (r2 <= plcutoff2) ) {
1317 int atom2 = pExt_1[j].
id;
1318 if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
1323 int k = pairlistoffset;
1324 int ku = pairlistindex;
1327 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE) 1329 register int jprev0 = pairlist [k ];
1330 register int jprev1 = pairlist [k + 1];
1335 __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1336 __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1337 __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1340 const __m128d P_I_X = _mm_set1_pd(p_i_x);
1341 const __m128d P_I_Y = _mm_set1_pd(p_i_y);
1342 const __m128d P_I_Z = _mm_set1_pd(p_i_z);
1344 int atom2_0 = pExt_1[jprev0].
id;
1345 int atom2_1 = pExt_1[jprev1].
id;
1348 for ( ; k < ku - 2; k +=2 ) {
1353 __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
1354 __m128d R2_01 = _mm_mul_pd(T_01, T_01);
1355 T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
1356 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1357 T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
1358 R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1360 jprev0 = pairlist[k];
1361 jprev1 = pairlist[k+1];
1363 PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1364 PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1365 PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1367 __align(16)
double r2_01[2];
1368 _mm_store_pd(r2_01, R2_01);
1370 if (r2_01[0] <= plcutoff2) {
1371 if (
UNLIKELY( atom2_0 >= excl_min && atom2_0 <= excl_max ))
1376 atom2_0 = pExt_1[jprev0].
id;
1378 if (r2_01[1] <= plcutoff2) {
1379 if (
UNLIKELY( atom2_1 >= excl_min && atom2_1 <= excl_max ))
1384 atom2_1 = pExt_1[jprev1].
id;
1390 register int jprev0 = pairlist [k];
1391 register int jprev1 = pairlist [k + 1];
1396 register BigReal pj_x_0, pj_x_1;
1397 register BigReal pj_y_0, pj_y_1;
1398 register BigReal pj_z_0, pj_z_1;
1399 register BigReal t_0, t_1, r2_0, r2_1;
1401 pj_x_0 = p_1[jprev0].position.x;
1402 pj_x_1 = p_1[jprev1].position.x;
1404 pj_y_0 = p_1[jprev0].position.y;
1405 pj_y_1 = p_1[jprev1].position.y;
1407 pj_z_0 = p_1[jprev0].position.z;
1408 pj_z_1 = p_1[jprev1].position.z;
1410 int atom2_0 = pExt_1[jprev0].
id;
1411 int atom2_1 = pExt_1[jprev1].
id;
1414 for ( ; k < ku - 2; k +=2 ) {
1419 t_0 = p_i_x - pj_x_0;
1420 t_1 = p_i_x - pj_x_1;
1424 t_0 = p_i_y - pj_y_0;
1425 t_1 = p_i_y - pj_y_1;
1429 t_0 = p_i_z - pj_z_0;
1430 t_1 = p_i_z - pj_z_1;
1434 jprev0 = pairlist[k];
1435 jprev1 = pairlist[k+1];
1437 pj_x_0 = p_1[jprev0].position.x;
1438 pj_x_1 = p_1[jprev1].position.x;
1439 pj_y_0 = p_1[jprev0].position.y;
1440 pj_y_1 = p_1[jprev1].position.y;
1441 pj_z_0 = p_1[jprev0].position.z;
1442 pj_z_1 = p_1[jprev1].position.z;
1444 if (r2_0 <= plcutoff2) {
1445 if ( atom2_0 >= excl_min && atom2_0 <= excl_max )
1450 atom2_0 = pExt_1[jprev0].
id;
1452 if (r2_1 <= plcutoff2) {
1453 if ( atom2_1 >= excl_min && atom2_1 <= excl_max )
1458 atom2_1 = pExt_1[jprev1].
id;
1466 #pragma omp simd simdlen(16) 1467 for (k = kp; k < ku; k++) {
1468 int j = pairlist[k];
1469 int atom2 = pExt_1[j].
id;
1471 BigReal p_j_x = p_1[j].position.x;
1472 BigReal p_j_y = p_1[j].position.y;
1473 BigReal p_j_z = p_1[j].position.z;
1482 #pragma omp ordered simd monotonic(plin:1, pli:1) 1483 if (r2 <= plcutoff2) {
1484 if ( atom2 >= excl_min && atom2 <= excl_max )
1494 if ( plin == pairlistn && pli == pairlist2 ) {
1501 plint *plix = pairlistx;
1502 plint *plim = pairlistm;
1504 plint *plinA1 = pairlistnA1;
1505 plint *plixA1 = pairlistxA1;
1506 plint *plimA1 = pairlistmA1;
1507 plint *plinA2 = pairlistnA2;
1508 plint *plixA2 = pairlistxA2;
1509 plint *plimA2 = pairlistmA2;
1510 plint *plinA3 = pairlistnA3;
1511 plint *plixA3 = pairlistxA3;
1512 plint *plimA3 = pairlistmA3;
1513 plint *plinA4 = pairlistnA4;
1514 plint *plixA4 = pairlistxA4;
1515 plint *plimA4 = pairlistmA4;
1521 int unsortedNpairn = plin - pairlistn;
1523 for ( k=0; k<unsortedNpairn; ++k ) {
1524 int j = pairlistn[k];
1525 switch(pswitchTable[p_i_partition + 5*(p_1[j].
partition)]) {
1526 case 0: *(plin++) = j;
break;
1527 case 1: *(plinA1++) = j;
break;
1528 case 2: *(plinA2++) = j;
break;
1529 case 3: *(plinA3++) = j;
break;
1530 case 4: *(plinA4++) = j;
break;
1535 int npair2 = pli - pairlist2;
1538 for (k=0; k < npair2; ++k ) {
1539 int j = pairlist2[k];
1540 int atom2 = pExt_1[j].
id;
1541 int excl_flag = excl_flags[atom2];
1542 ALCH(
int pswitch = pswitchTable[p_i_partition + 5*(p_1[j].
partition)];)
1543 switch ( excl_flag
ALCH( + 3 * pswitch)) {
1544 case 0: *(plin++) = j;
break;
1545 case 1: *(plix++) = j;
break;
1546 case 2: *(plim++) = j;
break;
1548 case 3: *(plinA1++) = j;
break;
1549 case 6: *(plinA2++) = j;
break;
1550 case 9: *(plinA3++) = j;
break;
1551 case 12: *(plinA4++) = j;
break;
1552 case 5: *(plimA1++) = j;
break;
1553 case 8: *(plimA2++) = j;
break;
1554 case 11: *(plimA3++) = j;
break;
1555 case 14: *(plimA4++) = j;
break;
1556 case 4: *(plixA1++) = j;
break;
1557 case 7: *(plixA2++) = j;
break;
1558 case 10: *(plixA3++) = j;
break;
1559 case 13: *(plixA4++) = j;
break;
1564 npairn = plin - pairlistn;
1565 pairlistn_save = pairlistn;
1566 pairlistn_save[npairn] = npairn ? pairlistn_save[npairn-1] : -1;
1567 pairlists.
newsize(npairn + 1);
1569 npairx = plix - pairlistx;
1570 pairlistx_save = pairlists.
newlist();
1571 for ( k=0; k<npairx; ++k ) {
1572 pairlistx_save[k] = pairlistx[k];
1574 pairlistx_save[k] = k ? pairlistx_save[k-1] : -1;
1575 pairlists.
newsize(npairx + 1);
1577 npairm = plim - pairlistm;
1578 pairlistm_save = pairlists.
newlist();
1579 for ( k=0; k<npairm; ++k ) {
1580 pairlistm_save[k] = pairlistm[k];
1582 pairlistm_save[k] = k ? pairlistm_save[k-1] : -1;
1583 pairlists.
newsize(npairm + 1);
1587 #define PAIRLISTFROMARRAY(NPAIRS,PL1,PL2,PLSAVE) \ 1589 (NPAIRS) = (PL2) - (PL1); \ 1590 (PLSAVE) = pairlists.newlist(); \ 1591 for ( k=0; k<(NPAIRS); ++k ) { \ 1592 (PLSAVE)[k] = (PL1)[k]; \ 1594 (PLSAVE)[k] = k ? (PLSAVE)[k-1] : -1; \ 1595 pairlists.newsize((NPAIRS) + 1); \ 1598 PAIRLISTFROMARRAY(npairnA1,pairlistnA1,plinA1,pairlistnA1_save);
1599 PAIRLISTFROMARRAY(npairxA1,pairlistxA1,plixA1,pairlistxA1_save);
1600 PAIRLISTFROMARRAY(npairmA1,pairlistmA1,plimA1,pairlistmA1_save);
1601 PAIRLISTFROMARRAY(npairnA2,pairlistnA2,plinA2,pairlistnA2_save);
1602 PAIRLISTFROMARRAY(npairxA2,pairlistxA2,plixA2,pairlistxA2_save);
1603 PAIRLISTFROMARRAY(npairmA2,pairlistmA2,plimA2,pairlistmA2_save);
1604 PAIRLISTFROMARRAY(npairnA3,pairlistnA3,plinA3,pairlistnA3_save);
1605 PAIRLISTFROMARRAY(npairxA3,pairlistxA3,plixA3,pairlistxA3_save);
1606 PAIRLISTFROMARRAY(npairmA3,pairlistmA3,plimA3,pairlistmA3_save);
1607 PAIRLISTFROMARRAY(npairnA4,pairlistnA4,plinA4,pairlistnA4_save);
1608 PAIRLISTFROMARRAY(npairxA4,pairlistxA4,plixA4,pairlistxA4_save);
1609 PAIRLISTFROMARRAY(npairmA4,pairlistmA4,plimA4,pairlistmA4_save);
1610 #undef PAIRLISTFROMARRAY 1614 if ( ! savePairlists ) pairlists.
reset();
1621 pairlists.
nextlist(&pairlistn_save,&npairn); --npairn;
1622 pairlists.
nextlist(&pairlistx_save,&npairx); --npairx;
1623 pairlists.
nextlist(&pairlistm_save,&npairm); --npairm;
1625 pairlists.
nextlist(&pairlistnA1_save,&npairnA1); --npairnA1;
1626 pairlists.
nextlist(&pairlistxA1_save,&npairxA1); --npairxA1;
1627 pairlists.
nextlist(&pairlistmA1_save,&npairmA1); --npairmA1;
1628 pairlists.
nextlist(&pairlistnA2_save,&npairnA2); --npairnA2;
1629 pairlists.
nextlist(&pairlistxA2_save,&npairxA2); --npairxA2;
1630 pairlists.
nextlist(&pairlistmA2_save,&npairmA2); --npairmA2;
1631 pairlists.
nextlist(&pairlistnA3_save,&npairnA3); --npairnA3;
1632 pairlists.
nextlist(&pairlistxA3_save,&npairxA3); --npairxA3;
1633 pairlists.
nextlist(&pairlistmA3_save,&npairmA3); --npairmA3;
1634 pairlists.
nextlist(&pairlistnA4_save,&npairnA4); --npairnA4;
1635 pairlists.
nextlist(&pairlistxA4_save,&npairxA4); --npairxA4;
1636 pairlists.
nextlist(&pairlistmA4_save,&npairmA4); --npairmA4;
1647 ( ( p_i.
partition == 1 ) ? 1. : -1. ) : 0.
1668 const float kq_i_f = kq_i;
1671 p_i_x_f, p_i_y_f, p_i_z_f, pFlt_1, pairlistn_save, npairn, pairlisti,
1672 r2list_f, xlist, ylist, zlist);
1675 p_i_x, p_i_y, p_i_z, p_1, pairlistn_save, npairn, pairlisti,
1690 KNL(
float drudeNbtholeCut2 = (drudeNbtholeCut * drudeNbtholeCut); )
1694 for (k = 0; k < npairi; k++) {
1695 NOKNL(
if (r2list[k] > drudeNbtholeCut2) {
continue; } )
1696 KNL(
if (r2list_f[k] > drudeNbtholeCut2) {
continue; } )
1698 const int j = pairlisti[k];
1701 if (
p_j.hydrogenGroupSize < 2 ) {
continue; }
1703 for (kk = 0;kk < NumNbtholePairParams; kk++) {
1705 if (((nbthole_array[kk].ind1 == p_i.
vdwType) &&
1706 (nbthole_array[kk].
ind2 ==
p_j.vdwType)) ||
1708 (nbthole_array[kk].
ind1 ==
p_j.vdwType))) {
1712 if ( kk < NumNbtholePairParams ) {
1715 const BigReal aa = nbthole_array[kk].
tholeij * powf(aprod, -1.f/6);
1716 const BigReal qqaa = CC * p_0[i].charge * p_1[j].charge;
1717 const BigReal qqad = CC * p_0[i].charge * p_1[j+1].charge;
1718 const BigReal qqda = CC * p_0[i+1].charge * p_1[j].charge;
1719 const BigReal qqdd = CC * p_0[i+1].charge * p_1[j+1].charge;
1721 Vector raa = (p_0[i].position + params->
offset) - p_1[j].position;
1722 Vector rad = (p_0[i].position + params->
offset) - p_1[j+1].position;
1723 Vector rda = (p_0[i+1].position + params->
offset) - p_1[j].position;
1724 Vector rdd = (p_0[i+1].position + params->
offset) - p_1[j+1].position;
1731 BigReal auaa = aa / raa_invlen;
1732 BigReal auad = aa / rad_invlen;
1733 BigReal auda = aa / rda_invlen;
1734 BigReal audd = aa / rdd_invlen;
1741 BigReal polyauaa = 1 + 0.5*auaa;
1742 BigReal polyauad = 1 + 0.5*auad;
1743 BigReal polyauda = 1 + 0.5*auda;
1744 BigReal polyaudd = 1 + 0.5*audd;
1747 ethole += qqaa * raa_invlen * (- polyauaa * expauaa);
1748 ethole += qqad * rad_invlen * (- polyauad * expauad);
1749 ethole += qqda * rda_invlen * (- polyauda * expauda);
1750 ethole += qqdd * rdd_invlen * (- polyaudd * expaudd);
1752 polyauaa = 1 + auaa*polyauaa;
1753 polyauad = 1 + auad*polyauad;
1754 polyauda = 1 + auda*polyauda;
1755 polyaudd = 1 + audd*polyaudd;
1757 BigReal raa_invlen3 = raa_invlen * raa_invlen * raa_invlen;
1758 BigReal rad_invlen3 = rad_invlen * rad_invlen * rad_invlen;
1759 BigReal rda_invlen3 = rda_invlen * rda_invlen * rda_invlen;
1760 BigReal rdd_invlen3 = rdd_invlen * rdd_invlen * rdd_invlen;
1762 BigReal dfaa = qqaa * raa_invlen3 * (polyauaa*expauaa);
1763 BigReal dfad = qqad * rad_invlen3 * (polyauad*expauad);
1764 BigReal dfda = qqda * rda_invlen3 * (polyauda*expauda);
1765 BigReal dfdd = qqdd * rdd_invlen3 * (polyaudd*expaudd);
1767 Vector faa = -dfaa * raa;
1768 Vector fad = -dfad * rad;
1769 Vector fda = -dfda * rda;
1770 Vector fdd = -dfdd * rdd;
1772 SHORT(f_net)
NOSHORT(fullf_net) += (faa + fad) + (fda + fdd);
1773 params->
ff[0][i] += faa + fad;
1774 params->
ff[0][i+1] += fda + fdd;
1775 params->
ff[1][j] -= faa + fda;
1776 params->
ff[1][j+1] -= fad + fdd;
1790 KNL(
float loweAndersenCutoff2 = (loweAndersenCutoff * loweAndersenCutoff); )
1804 int atom_i = pExt_0[i].
id;
1806 if (loweAndersenUseCOMvelocity) {
1812 pos += v_0[i+l].
charge * p_0[i+l].position;
1823 for (k = 0; k < npairi; k++) {
1824 NOKNL(
if (r2list[k] > loweAndersenCutoff2) {
continue; } )
1825 KNL(
if (r2list_f[k] > loweAndersenCutoff2) {
continue; } )
1827 const int j = pairlisti[k];
1831 if (!
p_j.hydrogenGroupSize) {
continue; }
1832 if (rand->
uniform() > loweAndersenProb) {
continue; }
1834 Mass mass_j = v_j.charge;
1837 int atom_j = pExt_1[j].
id;
1839 if (loweAndersenUseCOMvelocity) {
1843 for (
int l = 0; l <
p_j.hydrogenGroupSize; l++) {
1845 pos += v_1[j+l].
charge * p_1[j+l].position;
1855 Mass mu_ij = (mass_i * mass_j)/(mass_i + mass_j);
1860 Force force = mu_ij * dt_inv * (lambda - (deltaV * sigma_ij)) * sigma_ij;
1865 if (loweAndersenUseCOMvelocity) {
1866 BigReal inv_mass_i = 1.0 / mass_i;
1867 BigReal inv_mass_j = 1.0 / mass_j;
1869 params->
ff[0][i+l] += (v_0[i+l].
charge * inv_mass_i) * force;
1871 for (
int l = 0; l <
p_j.hydrogenGroupSize; l++) {
1872 params->
ff[1][j+l] -= (v_1[j+l].
charge * inv_mass_j) * force;
1875 params->
ff[0][i] += force;
1876 params->
ff[1][j] -= force;
1892 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_ENERGY 1893 case VDW_SWITCH_MODE:
1896 #undef VDW_SWITCH_MODE 1898 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_MARTINI 1899 case VDW_SWITCH_MODE:
1902 #undef VDW_SWITCH_MODE 1904 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_FORCE 1905 case VDW_SWITCH_MODE:
1908 #undef VDW_SWITCH_MODE 1922 p_i_x, p_i_y, p_i_z, p_1, pairlistm_save, npairm, pairlisti,
1924 exclChecksum += npairi;
1929 #define MODIFIED(X) X 1937 p_i_x, p_i_y, p_i_z, p_1, pairlistx_save, npairx, pairlisti,
1939 exclChecksum += npairi;
1945 #define EXCLUDED(X) X 1958 exclChecksum += npairx;
1961 #if 0 ALCH(+1) // nonbondedbase2 for alchemical-separeted pairlists 1964 #define ALCHPAIR(X) X 1966 #define NOT_ALCHPAIR(X) 1988 p_i_x, p_i_y, p_i_z, p_1, pairlistnA1_save, npairnA1, pairlisti,
2001 p_i_x, p_i_y, p_i_z, p_1, pairlistnA2_save, npairnA2, pairlisti,
2014 p_i_x, p_i_y, p_i_z, p_1, pairlistnA3_save, npairnA3, pairlisti,
2027 p_i_x, p_i_y, p_i_z, p_1, pairlistnA4_save, npairnA4, pairlisti,
2043 #define MODIFIED(X) X 2050 p_i_x, p_i_y, p_i_z, p_1, pairlistmA1_save, npairmA1, pairlisti,
2052 exclChecksum += npairi;
2064 p_i_x, p_i_y, p_i_z, p_1, pairlistmA2_save, npairmA2, pairlisti,
2066 exclChecksum += npairi;
2078 p_i_x, p_i_y, p_i_z, p_1, pairlistmA3_save, npairmA3, pairlisti,
2080 exclChecksum += npairi;
2092 p_i_x, p_i_y, p_i_z, p_1, pairlistmA4_save, npairmA4, pairlisti,
2094 exclChecksum += npairi;
2112 #define EXCLUDED(X) X 2120 p_i_x, p_i_y, p_i_z, p_1, pairlistxA1_save, npairxA1, pairlisti,
2122 exclChecksum += npairi;
2134 p_i_x, p_i_y, p_i_z, p_1, pairlistxA2_save, npairxA2, pairlisti,
2136 exclChecksum += npairi;
2148 p_i_x, p_i_y, p_i_z, p_1, pairlistxA3_save, npairxA3, pairlisti,
2150 exclChecksum += npairi;
2162 p_i_x, p_i_y, p_i_z, p_1, pairlistxA4_save, npairxA4, pairlisti,
2164 exclChecksum += npairi;
2181 exclChecksum += npairxA1 + npairxA2 + npairxA3 + npairxA4;
2187 #define NOT_ALCHPAIR(X) X 2189 #endif // end nonbondedbase2.h includes for alchemical pairlists 2191 #ifdef NETWORK_PROGRESS 2192 CkNetworkProgress();
2197 __dcbt ((
void *) &(p_0[i+1]));
2204 FULL( fullf_net.
x += fullf_i_x; )
2205 FULL( fullf_net.
y += fullf_i_y; )
2206 FULL( fullf_net.
z += fullf_i_z; )
2210 FULL( fullf_0[i].x += fullf_i_x; )
2211 FULL( fullf_0[i].y += fullf_i_y; )
2212 FULL( fullf_0[i].z += fullf_i_z; )
2214 if ( savePairlists || ! usePairlists ) {
2231 #
if (
PAIR( 1+ ) 0 )
2256 #if ( FULL( 1+ ) 0 ) 2257 #if ( PAIR( 1+ ) 0 ) 2266 reduction[fullElectVirialIndex_XX] += fullElectVirial_xx;
2267 reduction[fullElectVirialIndex_XY] += fullElectVirial_xy;
2268 reduction[fullElectVirialIndex_XZ] += fullElectVirial_xz;
2269 reduction[fullElectVirialIndex_YX] += fullElectVirial_xy;
2270 reduction[fullElectVirialIndex_YY] += fullElectVirial_yy;
2271 reduction[fullElectVirialIndex_YZ] += fullElectVirial_yz;
2272 reduction[fullElectVirialIndex_ZX] += fullElectVirial_xz;
2273 reduction[fullElectVirialIndex_ZY] += fullElectVirial_yz;
2274 reduction[fullElectVirialIndex_ZZ] += fullElectVirial_zz;
2321 #ifndef MEM_OPT_VERSION
register BigReal virial_xy
void sortEntries_mergeSort_v1(SortEntry *&se, SortEntry *&buf, int seLen)
static int pressureProfileSlabs
register BigReal virial_xz
const TableEntry * table_row(unsigned int i) const
register BigReal virial_yz
void sortEntries_selectionSort(SortEntry *const se, const int seLen)
static BigReal dielectric_1
static void partition(int *order, const FullAtom *atoms, int begin, int end)
register BigReal electEnergy
static const Molecule * mol
int pairlist_from_pairlist(BigReal cutoff2, BigReal p_i_x, BigReal p_i_y, BigReal p_i_z, const CompAtom *p_j, const plint *list, int list_size, int *newlist, BigReal r2_delta, BigReal *r2list)
#define NBWORKARRAY(TYPE, NAME, SIZE)
static BigReal pressureProfileThickness
const int32 * get_full_exclusions_for_atom(int anum) const
BigReal GetAtomAlpha(int i) const
const int32 * get_mod_exclusions_for_atom(int anum) const
SimParameters * simParameters
const ExclusionCheck * get_excl_check_for_atom(int anum) const
Molecule stores the structural information for the system.
void sortEntries_bubbleSort(SortEntry *const se, const int seLen)
static int vdw_switch_mode
#define NBWORKARRAYSINIT(ARRAYS)
register BigReal virial_yy
void pp_clamp(int &n, int nslabs)
void nextlist(plint **list, int *list_size)
static Bool pairInteractionSelf
static BigReal * table_noshort
void NAMD_bug(const char *err_msg)
static BigReal pressureProfileMin
register BigReal virial_zz
const Real * get_qmAtomGroup() const
NbtholePairValue * nbthole_array
static Bool vdwForceSwitching
static BigReal * lambda_table
static BigReal * slow_table
#define REZERO_EXCL_FLAGS_BUFF
static Bool pairInteractionOn
static const LJTable * ljTable
register BigReal virial_xx
static BigReal LJewaldcof
void newsize(int list_size)
static BigReal * table_short
void setIndexValue(plint i)
#define COMPONENT_DOTPRODUCT(A, B)
static BigReal alchVdwShiftCoeff
NAMD_HOST_DEVICE BigReal rlength(void) const
BigReal * pressureProfileReduction
plint * newlist(int max_size)
NAMD_HOST_DEVICE Vector unit(void) const
void sortEntries_mergeSort_v2(SortEntry *&se, SortEntry *&buf, int seLen)
ComputeNonbondedWorkArrays * workArrays