NAMD
Sequencer.C
Go to the documentation of this file.
1 
7 /*****************************************************************************
8  * $Source: /home/cvs/namd/cvsroot/namd2/src/Sequencer.C,v $
9  * $Author: jim $
10  * $Date: 2016/08/26 19:40:32 $
11  * $Revision: 1.1230 $
12  *****************************************************************************/
13 
14 // The UPPER_BOUND macro is used to eliminate all of the per atom
15 // computation done for the numerical integration in Sequencer::integrate()
16 // other than the actual force computation and atom migration.
17 // The idea is to "turn off" the integration for doing performance
18 // profiling in order to get an upper bound on the speedup available
19 // by moving the integration parts to the GPU.
20 //
21 // Define it in the Make.config file, i.e. CXXOPTS += -DUPPER_BOUND
22 // or simply uncomment the line below.
23 //
24 //#define UPPER_BOUND
25 
26 //for gbis debugging; print net force on each atom
27 #include "CudaRecord.h"
28 #include "PatchData.h"
29 #include "common.h"
30 #define PRINT_FORCES 0
31 
32 #include "InfoStream.h"
33 #include "Node.h"
34 #include "SimParameters.h"
35 #include "Sequencer.h"
36 #include "HomePatch.h"
37 #include "ReductionMgr.h"
38 #include "CollectionMgr.h"
39 #include "BroadcastObject.h"
40 #include "Output.h"
41 #include "Controller.h"
42 #include "Broadcasts.h"
43 #include "Molecule.h"
44 #include "NamdOneTools.h"
45 #include "LdbCoordinator.h"
46 #include "Thread.h"
47 #include "Random.h"
48 #include "PatchMap.inl"
49 #include "ComputeMgr.h"
50 #include "ComputeGlobal.h"
51 #include "NamdEventsProfiling.h"
52 #include <iomanip>
53 #include "ComputeCUDAMgr.h"
54 #include "CollectionMaster.h"
55 #include "IMDOutput.h"
56 #include "CudaGlobalMasterServer.h"
57 
58 #include "TestArray.h"
59 
60 #include <algorithm> // Used for sorting
61 
62 #define MIN_DEBUG_LEVEL 3
63 //#define DEBUGM
64 //
65 // Define NL_DEBUG below to activate D_*() macros in integrate_SOA()
66 // for debugging.
67 //
68 //#define NL_DEBUG
69 #include "Debug.h"
70 
71 #if USE_HPM
72 #define START_HPM_STEP 1000
73 #define STOP_HPM_STEP 1500
74 #endif
75 
76 #include "DeviceCUDA.h"
77 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
78 #ifdef WIN32
79 #define __thread __declspec(thread)
80 #endif
81 extern __thread DeviceCUDA *deviceCUDA;
82 #ifdef __IBMCPP__
83 // IBM compiler requires separate definition for static members
85 #endif
86 #endif
87 
88 #define SPECIAL_PATCH_ID 91
89 
90 //
91 // BEGIN
92 // print_* routines
93 // assist in debugging SOA integration code
94 //
95 static void print_vel_AOS(
96  const FullAtom *a,
97  int ilo=0, int ihip1=1
98  ) {
99  printf("AOS Velocities:\n");
100  for (int i=ilo; i < ihip1; i++) {
101  printf("%d %g %g %g\n", i,
102  a[i].velocity.x, a[i].velocity.y, a[i].velocity.z);
103  }
104 }
105 
106 
107 static void print_vel_SOA(
108  const double *vel_x,
109  const double *vel_y,
110  const double *vel_z,
111  int ilo=0, int ihip1=1
112  ) {
113  printf("SOA Velocities:\n");
114  for (int i=ilo; i < ihip1; i++) {
115  printf("%d %g %g %g\n", i, vel_x[i], vel_y[i], vel_z[i]);
116  }
117 }
118 
119 
120 static void print_tensor(const Tensor& t) {
121  printf("%g %g %g %g %g %g %g %g %g\n",
122  t.xx, t.xy, t.xz, t.yx, t.yy, t.yz, t.zx, t.zy, t.zz);
123 }
124 //
125 // END
126 // print_* routines
127 // assist in debugging SOA integration code
128 //
129 
130 
142 struct CheckStep {
143  int period;
144  int nextstep;
145 
149  inline int check(int step) {
150  if (step == nextstep) return( nextstep += period, 1 );
151  else return 0;
152  }
153 
159  inline int init(int initstep, int initperiod, int delta=0) {
160  period = initperiod;
161  nextstep = initstep - (initstep % period) - (delta % period);
162  while (nextstep <= initstep) nextstep += period;
163  // returns true if initstep is divisible by period
164  return (initstep + period == nextstep);
165  }
166 
167  CheckStep() : period(0), nextstep(0) { }
168 };
169 
170 
172  simParams(Node::Object()->simParameters),
173  patch(p),
174  collection(CollectionMgr::Object()),
175  ldbSteps(0),
176  pairlistsAreValid(0),
177  pairlistsAge(0),
178  pairlistsAgeLimit(0)
179 {
182 
183 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
185  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
186  PatchData* patchData = cpdata.ckLocalBranch();
188  } else
189 #endif // defined(NAMD_CUDA) || defined(NAMD_HIP)
190  {
192  }
193 
198  int ntypes = simParams->pressureProfileAtomTypes;
199  int nslabs = simParams->pressureProfileSlabs;
202  REDUCTIONS_PPROF_INTERNAL, 3*nslabs*ntypes);
203  } else {
205  }
206  if (simParams->multigratorOn) {
208  } else {
209  multigratorReduction = NULL;
210  }
211  ldbCoordinator = (LdbCoordinator::Object());
214 
215  // Is soluteScaling enabled?
216  if (simParams->soluteScalingOn) {
217  // If so, we must "manually" perform charge scaling on startup because
218  // Sequencer will not get a scripting task for initial charge scaling.
219  // Subsequent rescalings will take place through a scripting task.
221  }
222 
224  stochRescale_count = 0;
226  masterThread = true;
227 // patch->write_tip4_props();
228 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER)
230 #if 0
231  CUDASequencer = new SequencerCUDA(deviceCUDA->getDeviceID(),
232  simParams);
233 #else
234  CUDASequencer = SequencerCUDA::InstanceInit(deviceCUDA->getDeviceID(),
235  simParams);
236 
237  syncColl = SynchronousCollectives::Object();
238  globalGPUMgr = GlobalGPUMgr::Object();
240 #endif
241  }
242 #endif
243 }
244 
246 {
247  delete broadcast;
248  delete reduction;
249  delete min_reduction;
251  delete random;
253 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER)
255  delete CUDASequencer;
257  }
258 #endif
259 }
260 
261 // Invoked by thread
262 void Sequencer::threadRun(Sequencer* arg)
263 {
265  arg->algorithm();
266 }
267 
268 // Invoked by Node::run() via HomePatch::runSequencer()
269 void Sequencer::run(void)
270 {
271  // create a Thread and invoke it
272  DebugM(4, "::run() - this = " << this << "\n" );
273  thread = CthCreate((CthVoidFn)&(threadRun),(void*)(this),SEQ_STK_SZ);
274  CthSetStrategyDefault(thread);
275  priority = PATCH_PRIORITY(patch->getPatchID());
276  awaken();
277 }
278 
280 {
282  CthSuspend();
284 }
285 
286 // Defines sequence of operations on a patch. e.g. when
287 // to push out information for Compute objects to consume
288 // when to migrate atoms, when to add forces to velocity update.
290 {
291  int scriptTask;
292  int scriptSeq = 0;
293  // Blocking receive for the script barrier.
294  while ( (scriptTask = broadcast->scriptBarrier.get(scriptSeq++)) != SCRIPT_END ) {
295  switch ( scriptTask ) {
296  case SCRIPT_OUTPUT:
298  break;
299  case SCRIPT_FORCEOUTPUT:
301  break;
302  case SCRIPT_MEASURE:
304  break;
305  case SCRIPT_REINITVELS:
307  break;
308  case SCRIPT_RESCALEVELS:
310  break;
313  break;
315  reloadCharges();
316  break;
317  case SCRIPT_CHECKPOINT:
318  patch->checkpoint();
320  break;
321  case SCRIPT_REVERT:
322  patch->revert();
324  pairlistsAreValid = 0;
325  break;
331  break;
332  case SCRIPT_ATOMSENDRECV:
333  case SCRIPT_ATOMSEND:
334  case SCRIPT_ATOMRECV:
335  patch->exchangeAtoms(scriptTask);
336  break;
337  case SCRIPT_MINIMIZE:
338 #if 0
340  NAMD_die("Minimization is currently not supported on the GPU integrator\n");
341  }
342 #endif
343  minimize();
344  break;
345  case SCRIPT_RUN:
346  case SCRIPT_CONTINUE:
347  //
348  // DJH: Call a cleaned up version of integrate().
349  //
350  // We could test for simulation options and call a more basic version
351  // of integrate() where we can avoid performing most tests.
352  //
353 #ifdef SEQUENCER_SOA
354  if ( simParams->SOAintegrateOn ) {
355 #ifdef NODEGROUP_FORCE_REGISTER
356 
358  else {
359 #endif
360  integrate_SOA(scriptTask);
361 #ifdef NODEGROUP_FORCE_REGISTER
362  }
363 #endif
364  }
365  else
366 #endif
367  integrate(scriptTask);
368  break;
369  default:
370  NAMD_bug("Unknown task in Sequencer::algorithm");
371  }
372  }
374  terminate();
375 }
376 
377 
378 #ifdef SEQUENCER_SOA
379 
381 //
382 // begin SOA code
383 //
384 
385 #if defined(NODEGROUP_FORCE_REGISTER)
386 
388  PatchMap* patchMap = PatchMap::Object();
389  CUDASequencer->numPatchesCheckedIn += 1;
390  if (CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe())) {
391  masterThread = false;
392  CUDASequencer->waitingThreads.push_back(CthSelf());
393  NAMD_EVENT_STOP(patch->flags.event_on, NamdProfileEvent::INTEGRATE_SOA_1);
394  CthSuspend();
395 
396  // JM: if a thread get here, it will be for migrating atoms until the end of the simulation
397  while(true){
398  // read global flags
399  int lastStep = CUDASequencer->patchData->flags.step;
400  int startup = (CUDASequencer->patchData->flags.step == simParams->firstTimestep);
401  if (CUDASequencer->breakSuspends) break;
403  this->patch->positionsReady_GPU(true, startup);
404  } else {
405  this->patch->positionsReady_SOA(true);
406  }
407  CUDASequencer->numPatchesCheckedIn += 1;
408  CUDASequencer->waitingThreads.push_back(CthSelf());
409  if(CUDASequencer->numPatchesCheckedIn == patchMap->numPatchesOnNode(CkMyPe()) - 1 &&
410  CUDASequencer->masterThreadSleeping){
411  CUDASequencer->masterThreadSleeping = false;
412  CthAwaken(CUDASequencer->masterThread);
413  }
414  CthSuspend();
415  }
416  }
417 }
418 void Sequencer::wakeULTs(){
419  CUDASequencer->numPatchesCheckedIn = 0;
420  for (CthThread t : CUDASequencer->waitingThreads) {
421  CthAwaken(t);
422  }
423  CUDASequencer->waitingThreads.clear();
424 }
425 
426 void Sequencer::runComputeObjectsCUDA(int doMigration, int doGlobal, int pairlists, int nstep, int startup) {
427 
428  PatchMap* map = PatchMap::Object();
429 
430  bool isMaster = deviceCUDA->getMasterPe() == CkMyPe();
432 
433  // Sync after the node barrier. This is making sure that the position buffers have been
434  // populated. However, this doesn't need to happen at the node level. I.e. the non-pme
435  // nonbonded calculations can begin before the PME device is finished setting it's positions.
436  // There is a node barrier after the forces are done, so we don't have to worry about
437  // the positions being updated before the positions have been set
438  if (isMaster) {
439  CUDASequencer->sync();
440  }
441 
442 
443  // JM: Each masterPE owns a particular copy of the compute object we need to launch
444  // work on. The goal is to launch work on everyone, but for migration steps, sometimes
445  // there are a few operation that need to be launched on computes owned by different PEs.
446  // ComputeBondedCUDA::openBoxesOnPe() is an example: There is a list of PEs on each compute
447  // which holds information on which proxy object it should also invoke openBoxesOnPe();
448 
449  // We need to be mindful of that and, since we want to launch methods on different computes.
450  // A data structure that holds all nonbonded Computes from all masterPEs is necessary
452  ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
453  CudaComputeNonbonded* cudaNbond = cudaMgr->getCudaComputeNonbonded();
454  CudaPmeOneDevice* cudaPme = (globalGPUMgr->getIsPmeDevice() && simParams->fullElectFrequency) ?
455  cudaMgr->getCudaPmeOneDevice() : NULL;
456  // We need to submit PME reductions even if we don't compute the pme force
457  int computePme = (patch->flags.doFullElectrostatics);
458  int reducePme = (patch->flags.doVirial || patch->flags.doEnergy);
459  auto cudaGlobal = deviceCUDA->getIsGlobalDevice() ? cudaMgr->getCudaGlobalMaster() : nullptr;
460  if (isMaster && cudaGlobal && doMigration) cudaGlobal->setStep(static_cast<int64_t>(patch->flags.step));
461  // fprintf(stderr, "Patch %d invoking computes\n", this->patch->patchID);
462 
463 
464  // JM NOTE: I don't think the scheme below holds for nMasterPes > 1, check it out laters
465 
466  // Invoking computes on the GPU //
467  if(doMigration){
468  // JM: if we're on a migration step, we call the setup functions manually
469  // which means:
470  // 0. masterPe->doWork();
471  // 1. openBoxesOnPe();
472  // loadTuplesOnPe();
473  // 2. masterPe->launchWork();
474  // 3. finishPatchesOnPe();
475  // 4. masterPe->finishReductions();
476 
477  if(isMaster){
478  NAMD_EVENT_START(1, NamdProfileEvent::MIG_ATOMUPDATE);
479  cudaNbond->atomUpdate();
480  cudaBond->atomUpdate();
481  cudaNbond->doWork();
482  cudaBond->doWork();
483  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_ATOMUPDATE);
484 
485  if (cudaPme && !simParams->useDeviceMigration) CUDASequencer->atomUpdatePme();
486  if (cudaGlobal) {
487  cudaGlobal->updateAtomMaps();
488  cudaGlobal->communicateToClients(&(this->patch->lattice));
489  }
490  }
491 
493 
495  if(isMaster){
496  CUDASequencer->launch_set_compute_positions();
497  CUDASequencer->sync(); // TODO move this to tuple migration
498  }
500  }
501 
502  NAMD_EVENT_START(1, NamdProfileEvent::MIG_OPENBOXESONPE);
503 
504  // Here we need to do the following, for each Comput
505  for(int i = 0 ; i < CkNumPes(); i++){
506  // Here I need to find if the PE is on the bonded PE list
507  // XXX NOTE: This might be inefficient. Check the overhead later
508  ComputeBondedCUDA* b = CUDASequencer->patchData->cudaBondedList[i];
509  CudaComputeNonbonded* nb = CUDASequencer->patchData->cudaNonbondedList[i];
510  if (b == NULL) continue;
511  auto list = std::find(std::begin(b->getBondedPes()), std::end(b->getBondedPes()), CkMyPe());
512  if( list != std::end(b->getBondedPes()) ){
513  b->openBoxesOnPe(startup);
514 
515  // XXX NOTE: nb has a differente PE list!!! We need a different loop for nb
516  nb->openBoxesOnPe();
517 
518  }
520  }
521  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_OPENBOXESONPE);
522  // for the bonded kernels, there's an additional step here, loadTuplesOnPe
523  // JM NOTE: Those are major hotspots, they account for 50% of the migration time.
525  NAMD_EVENT_START(1, NamdProfileEvent::MIG_LOADTUPLESONPE);
526 
527  // NOTE: problem here: One of the CompAtomExt structures is turning to null, why?
528  cudaBond->loadTuplesOnPe(startup);
529  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_LOADTUPLESONPE);
531  NAMD_EVENT_START(1, NamdProfileEvent::MIG_COPYTUPLEDATA);
532 
534  cudaBond->copyTupleDataGPU(startup);
535  } else {
536  cudaBond->copyTupleDataSN();
537  }
538 
539  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_COPYTUPLEDATA);
540  // waits until everyone has finished to open their respective boxes
541  // node barrier actually prevents the error that is happening.
543  if(isMaster){
544  // launches work on the masterPe
545  NAMD_EVENT_START(1, NamdProfileEvent::MIG_LAUNCHWORK);
546  cudaBond->launchWork();
547  cudaNbond->launchWork();
548  if (cudaPme && computePme) {
549  cudaPme->compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
550  }
551  cudaNbond->reSortTileLists();
552  if (cudaGlobal) {
553  // cudaGlobal->communicateToClients(&(this->patch->lattice));
554  cudaGlobal->calculate();
555  cudaGlobal->communicateToMD(patch->flags.doEnergy, patch->flags.doVirial);
556  }
557  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_LAUNCHWORK);
558  }
559 
561  //global master force calculation
562 
563  if(doGlobal) {
564  NAMD_EVENT_START(1, NamdProfileEvent::GM_CALCULATE);
566  // Zero all SOA global forces before computing next global force
567  NAMD_EVENT_START(1, NamdProfileEvent::GM_ZERO);
568  int numhp = PatchMap::Object()->numHomePatches();
570  for(int i = 0; i < numhp; ++i) {
571  HomePatch *hp = hpList->item(i).patch;
572  hp->zero_global_forces_SOA();
573  }
574  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_ZERO);
575  NAMD_EVENT_START(1, NamdProfileEvent::GM_DOWORK);
576  // call globalmaster to calculate the force from client.
577  computeGlobal->doWork();
578  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_DOWORK);
579  NAMD_EVENT_START(1, NamdProfileEvent::GM_BARRIER);
581  // CkPrintf("post doWork step %d \n",this->patch->flags.step);
582  // CUDASequencer->printSOAPositionsAndVelocities();
583  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_BARRIER);
584  if(isMaster) {
585  // aggregate and copy the global forces to d_f_global device buffer
586  NAMD_EVENT_START(1, NamdProfileEvent::GM_CPY_FORCE);
587  CUDASequencer->copyGlobalForcesToDevice();
588  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CPY_FORCE);
589  }
590  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CALCULATE);
591  }
592  NAMD_EVENT_START(1, NamdProfileEvent::MIG_FINISHPATCHES);
593  cudaNbond->finishPatches();
594  cudaBond->finishPatches();
595  NAMD_EVENT_STOP(1, NamdProfileEvent::MIG_FINISHPATCHES);
597 
598  // finishes reduction with masterPe!
599  if(isMaster){
600  cudaNbond->finishReductions();
601  if (cudaPme) cudaPme->finishReduction(reducePme);
602  cudaBond->finishReductions();
603  if (cudaGlobal) cudaGlobal->finishReductions();
604  }
606  }
607  // if we're not on a migration step, do the work only on masterPE, except globalmaster work
608  else {
609  int doNbond = patch->flags.doNonbonded;
610  if(isMaster) {
611  // JM NOTE: We issue the nonbonded work first and sync it last
612  if (cudaPme && computePme) {
613  cudaPme->compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
614  }
615  cudaNbond->doWork();
616  cudaBond->doWork();
617  if (cudaGlobal) {
618  // cudaGlobal->communicateToClients(&(this->patch->lattice));
619  cudaGlobal->calculate();
620  cudaGlobal->communicateToMD(patch->flags.doEnergy, patch->flags.doVirial);
621  }
622  }
623  //global master force calculation
624  if(doGlobal) {
625  NAMD_EVENT_START(1, NamdProfileEvent::GM_CALCULATE);
626  NAMD_EVENT_START(1, NamdProfileEvent::GM_ZERO);
628  // Zero all SOA global forces before computing next global force
629  int numhp = PatchMap::Object()->numHomePatches();
631  for(int i = 0; i < numhp; ++i) {
632  HomePatch *hp = hpList->item(i).patch;
633  hp->zero_global_forces_SOA();
634  }
635  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_ZERO);
636  // call globalmaster to calculate the force from client.
637  NAMD_EVENT_START(1, NamdProfileEvent::GM_DOWORK);
638  computeGlobal->doWork();
639  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_DOWORK);
640  NAMD_EVENT_START(1, NamdProfileEvent::GM_BARRIER);
642  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_BARRIER);
643  // CkPrintf("post doWork 2 step %d \n",this->patch->flags.step);
644  // CUDASequencer->printSOAPositionsAndVelocities();
645  if(isMaster) {
646  // aggregate and copy the global forces to d_f_global device buffer
647  NAMD_EVENT_START(1, NamdProfileEvent::GM_CPY_FORCE);
648  CUDASequencer->copyGlobalForcesToDevice();
649  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CPY_FORCE);
650  }
651  NAMD_EVENT_STOP(1, NamdProfileEvent::GM_CALCULATE);
652  }
653  if(isMaster) {
654  cudaBond->finishPatches();
655  if (cudaPme) {
656  cudaPme->finishReduction(reducePme);
657  }
658  cudaNbond->finishPatches();
659  if (cudaGlobal) cudaGlobal->finishReductions();
660  }
661  }
662 
663 #if 0
664  // for migrations, I need to call OpenBoxesOnPe and finishPatches for every Pe
666  pairlistsAreValid = 1;
667  pairlistsAge = 0;
668  }
669  if ( pairlistsAreValid /* && !pressureStep */ ) ++pairlistsAge;
670 #endif
671  // syncColl->barrier(SynchronousCollectiveScope::all);
672 }
673 
674 //apply MC pressure control
676  const int step,
677  const int doMigration,
678  const int doEnergy,
679  const int doVirial,
680  const int maxForceNumber,
681  const int doGlobal)
682 {
683  bool isMasterPe = (deviceCUDA->getMasterPe() == CkMyPe() );
684  NodeReduction *reduction = CUDASequencer->patchData->reductionBackend;
685  Controller *c_out = CUDASequencer->patchData->c_out;
686  bool mGpuOn = CUDASequencer->mGpuOn;
687  Lattice oldLattice = this->patch->lattice;
688  Vector origin = this->patch->lattice.origin();
689  Tensor factor;
690  int accepted = 0; // status of MC volume fluctuation trial
691 
692  CUDASequencer->submitReductionValues(); // Copy data to NodeReduction
693  if(isMasterPe){
694  // Backup the reduction values for rejected move
695  CUDASequencer->patchData->reductionBackendSave->setVal(reduction);
696 
698  // Send the rescale factor for Monte Carlo Volume change from controller
699  c_out->mcPressure_prepare(step);
700  // receive the factor
701  factor = broadcast->positionRescaleFactor.get(step, CkNumPes());
702  }
703 
704  // Backup positions and forces, scale the coordinates and lattice
705  // Setup positions for energy and force calculation
706  CUDASequencer->monteCarloPressure_part1(factor, origin, oldLattice);
708  // Scale the lattice with factor
709  // patch.lattice is pointing to patch.flags.lattice
710  this->patch->lattice.rescale(factor);
711  CUDASequencer->patchData->lat = &(this->patch->lattice);
712  CUDASequencer->patchData->factor = &(factor);
713  // Copy scaled lattic flags to all patches
714  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
715 
716  // Zero all reduction values. We will add halfStep values, if
717  // the move is accepted.
718  reduction->zero();
719  }
720  }
721 
723  if(isMasterPe){
724  // copy global flags
725  CUDASequencer->update_patch_flags();
726  }
728  // Calculate the new force and energy after rescaling the coordinates
729  // Migration happend before calling this function
730  this->runComputeObjectsCUDA(0, doGlobal, 1, step, 0 /* startup */);
732 
733  if(isMasterPe){
734  // Accumulate force to SOA, calculate External energy/force
735  // reduce energy and virial
736  CUDASequencer->monteCarloPressure_part2(step, maxForceNumber,
737  doEnergy, doGlobal, doVirial);
738  CUDASequencer->submitReductionValues(); // Copy data to NodeReduction
739 
741  // Check to see if the move is accepted or not
742  c_out->mcPressure_accept(step);
743  accepted = broadcast->monteCarloBarostatAcceptance.get(step);
744  //printf("Sequencer (accept): step: %d, Pe: %d, ACC status: %d\n", step, CkMyPe(), accepted);
745  }
746 
747  if (accepted) { // Move accepted
748  CUDASequencer->monteCarloPressure_accept(doMigration);
749  } else { // Move rejected
751  // Set the lattice to the original value, before scaling
752  this->patch->lattice = oldLattice;
753  CUDASequencer->patchData->lat = &(this->patch->lattice);
754  // Copy scaled lattic flags to all patches
755  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
756  }
757 
758  // Restore all positions and forces and cuLattice
759  CUDASequencer->monteCarloPressure_reject(this->patch->lattice);
760  // Restore the reduction values
761 
762  reduction->setVal(CUDASequencer->patchData->reductionBackendSave);
763  }
764  }
765 
767  //continue the rejection step. Need to update lattice in all patches
768  if(isMasterPe && !accepted){
769  // copy global flags
770  CUDASequencer->update_patch_flags();
771  }
772 }
773 
774 void Sequencer::doMigrationGPU(const int startup, const int doGlobal,
775  const int updatePatchMap) {
776 
777  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
778  const bool updatePatchData = startup || doGlobal || updatePatchMap;
779  PatchMap* patchMap = PatchMap::Object();
780 
781  bool realloc = false;
782 
783  // This will check if a reallocation was done on the previous migration
784  // We use the scratch buffers to store the atomic data during reallocation
785  // However, the migrationDestination data much be maintained throughout
786  // migration (and tuple migration so beyond the scope of this function)
787  // We probably should add a function to do this at the end of migration
788  // But for now, DMC thought it was easier to just do at the begining
789  for (int i = 0; i < deviceCUDA->getNumDevice(); i++) {
790  if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
791  realloc = true;
792  break;
793  }
794  }
795  if (realloc) {
796  if (isMasterPe) {
797  CUDASequencer->reallocateMigrationDestination();
798  CUDASequencer->registerSOAPointersToHost();
799  }
801  if (isMasterPe) {
802  CUDASequencer->copySOAHostRegisterToDevice();
803  }
804  }
805 
806  // Proceed with migration
807  //
808  // Starts GPU migration
809  //
810  if (isMasterPe) {
811  CUDASequencer->migrationLocalInit();
812  // Hidden stream sync
813  }
815 
816  if (isMasterPe) {
817  CUDASequencer->migrationPerform();
818  // Hidden stream sync
819  }
821 
822  if (isMasterPe) {
823  CUDASequencer->migrationUpdateAtomCounts();
824  // Hidden stream sync
825  }
827 
828  if (isMasterPe) {
829  CUDASequencer->migrationUpdateAtomOffsets();
830  // Hidden stream sync
831  }
833 
834  if (isMasterPe) {
835  CUDASequencer->copyPatchDataToHost();
836  // Hidden stream sync
837  }
839 
840  // Update device buffer allocations
841  realloc = false;
842  if (isMasterPe) {
843  realloc = CUDASequencer->copyPatchData(true, false);
844  CUDASequencer->patchData->atomReallocationFlagPerDevice[deviceCUDA->getDeviceIndex()] = realloc;
845  }
847 
848  // If any of the devices have reallocated, we need to re-register the p2p buffers
849  for (int i = 0; i < deviceCUDA->getNumDevice(); i++) {
850  if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
851  realloc = true;
852  break;
853  }
854  }
855  if (realloc) {
856  if (isMasterPe) {
857  CUDASequencer->registerSOAPointersToHost();
858  }
860  if (isMasterPe) {
861  CUDASequencer->copySOAHostRegisterToDevice();
862  }
863  }
864 
865  // Performs various post processing like Solute/Solvent sorting and copies back to host
866  if (isMasterPe) {
867  CUDASequencer->migrationLocalPost(0);
868  CUDASequencer->migrationSortAtomsNonbonded();
869  }
870 
871  // If this is startup, we need to delay this until after AoS has been copied back to host
872  // Because we do need the atomIDs for the atom map initially
873  if (!updatePatchData) {
874  wakeULTs(); // Wakes everyone back up for migration
875  this->patch->positionsReady_GPU(1, startup);
876  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
877  CUDASequencer->masterThreadSleeping = true;
878  CUDASequencer->masterThread = CthSelf();
879  CthSuspend();
880  }
881  }
882 
883  if (isMasterPe) {
884  CUDASequencer->sync();
885  }
887 
888  if (isMasterPe) {
889  CUDASequencer->migrationUpdateDestination();
890  }
892 
893  if (isMasterPe) {
894  CUDASequencer->migrationUpdateProxyDestination();
895  }
897 
898  if (isMasterPe) {
899  CUDASequencer->migrationUpdateRemoteOffsets();
900  }
902 
903  if (isMasterPe) {
904  CUDASequencer->copyDataToPeers(true);
905  }
907 
908  if (updatePatchData) {
909  // The atom maps need to be cleared the HomePatch atom arrays have been updated
910  int numhp = PatchMap::Object()->numHomePatches();
912  for(int i = 0; i < numhp; ++i) {
913  HomePatch *hp = hpList->item(i).patch;
914  hp->clearAtomMap();
915  }
917  if (isMasterPe) {
918  // We need the atom ordering to be correct within each
919  // patch to setup the atom map. The vdwType of each atom
920  // is also used for exclusion tuple generation
921  CUDASequencer->copyAoSDataToHost();
922  }
924  wakeULTs(); // Wakes everyone back up for migration
925  this->patch->positionsReady_GPU(1, startup);
926  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
927  CUDASequencer->masterThreadSleeping = true;
928  CUDASequencer->masterThread = CthSelf();
929  CthSuspend();
930  }
932  }
933  if (isMasterPe) {
934  if (doGlobal || simParams->forceDcdFrequency > 0) {
935  CUDASequencer->updateHostPatchDataSOA(); // Needs to be called after HomePatch updates
936  }
937  }
939  if (isMasterPe) {
940  // This needs to be called after positionsReady_GPU to that the atom maps have been updated
941  // This will be called in updateDeviceData during with startup=true, but we need to call it
942  // with startup=false to make sure the atoms are updated
943  CUDASequencer->migrationUpdateAdvancedFeatures(false);
944  }
946 }
947 
948 // JM: Single-node integration scheme
949 void Sequencer::integrate_CUDA_SOA(int scriptTask){
950 
951  #ifdef TIMER_COLLECTION
952  TimerSet& t = patch->timerSet;
953  #endif
954  TIMER_INIT_WIDTH(t, KICK, simParams->timerBinWidth);
955  TIMER_INIT_WIDTH(t, MAXMOVE, simParams->timerBinWidth);
956  TIMER_INIT_WIDTH(t, DRIFT, simParams->timerBinWidth);
957  TIMER_INIT_WIDTH(t, PISTON, simParams->timerBinWidth);
958  TIMER_INIT_WIDTH(t, SUBMITHALF, simParams->timerBinWidth);
959  TIMER_INIT_WIDTH(t, VELBBK1, simParams->timerBinWidth);
960  TIMER_INIT_WIDTH(t, VELBBK2, simParams->timerBinWidth);
961  TIMER_INIT_WIDTH(t, RATTLE1, simParams->timerBinWidth);
962  TIMER_INIT_WIDTH(t, SUBMITFULL, simParams->timerBinWidth);
963  TIMER_INIT_WIDTH(t, SUBMITCOLLECT, simParams->timerBinWidth);
964 
965  // Keep track of the step number.
966  //int &step = patch->flags.step;
967  int &step = patch->flags.step;
968  step = simParams->firstTimestep;
969  Controller *c_out = CUDASequencer->patchData->c_out;
970  PatchMap* patchMap = PatchMap::Object();
971 
972  // For multiple time stepping, which force boxes are used?
973  int &maxForceUsed = patch->flags.maxForceUsed;
974  int &maxForceMerged = patch->flags.maxForceMerged;
975  maxForceUsed = Results::normal;
976  maxForceMerged = Results::normal;
977 
978  // Keep track of total steps and steps per cycle.
979  const int numberOfSteps = simParams->N;
980  //const int stepsPerCycle = simParams->stepsPerCycle;
981  CheckStep stepsPerCycle;
982  stepsPerCycle.init(step, simParams->stepsPerCycle);
983  // The fundamental time step, get the scaling right for velocity units.
984  const BigReal timestep = simParams->dt * RECIP_TIMEFACTOR;
985 
986  //const int nonbondedFrequency = simParams->nonbondedFrequency;
987  //slowFreq = nonbondedFrequency;
988  CheckStep nonbondedFrequency;
990  // The step size for short-range nonbonded forces.
991  const BigReal nbondstep = timestep * simParams->nonbondedFrequency;
992  int &doNonbonded = patch->flags.doNonbonded;
993  //doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
994  doNonbonded = (step >= numberOfSteps) ||
995  nonbondedFrequency.init(step, simParams->nonbondedFrequency);
996  //if ( nonbondedFrequency == 1 ) maxForceMerged = Results::nbond;
997  if ( nonbondedFrequency.period == 1 ) maxForceMerged = Results::nbond;
998  if ( doNonbonded ) maxForceUsed = Results::nbond;
999 
1000  // Do we do full electrostatics?
1001  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
1002  //const int fullElectFrequency = simParams->fullElectFrequency;
1003  //if ( dofull ) slowFreq = fullElectFrequency;
1004  CheckStep fullElectFrequency;
1005  if ( dofull ) slowFreq = simParams->fullElectFrequency;
1006  // The step size for long-range electrostatics.
1007  const BigReal slowstep = timestep * simParams->fullElectFrequency;
1008  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
1009  //doFullElectrostatics = (dofull &&
1010  // ((step >= numberOfSteps) || !(step%fullElectFrequency)));
1011  doFullElectrostatics = (dofull &&
1012  ((step >= numberOfSteps) ||
1013  fullElectFrequency.init(step, simParams->fullElectFrequency)));
1014  //if ( dofull && fullElectFrequency == 1 ) maxForceMerged = Results::slow;
1015  if ( dofull && fullElectFrequency.period == 1 ) maxForceMerged = Results::slow;
1016  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
1017 
1018  // Bother to calculate energies?
1019  int &doEnergy = patch->flags.doEnergy;
1020  //int energyFrequency = simParams->outputEnergies;
1021  CheckStep energyFrequency;
1022  int newComputeEnergies = simParams->computeEnergies;
1023  if(simParams->alchOn) newComputeEnergies = NAMD_gcd(newComputeEnergies, simParams->alchOutFreq);
1024  doEnergy = energyFrequency.init(step, newComputeEnergies);
1025 
1026  // check for Monte Carlo pressure control.
1027  CheckStep monteCarloPressureFrequency;
1028  doEnergy += monteCarloPressureFrequency.init(step, (simParams->monteCarloPressureOn ?
1029  simParams->monteCarloPressureFreq : numberOfSteps + 1) );
1030 
1031  int &doVirial = patch->flags.doVirial;
1032  doVirial = 1;
1033  // Do we need to return forces to TCL script or Colvar module?
1034  int doTcl = simParams->tclForcesOn;
1035  int doColvars = simParams->colvarsOn;
1036  const int doIMD = (simParams->IMDon && ! (simParams->IMDignore || simParams->IMDignoreForces));
1037  int doGlobal = (doTcl || doColvars || doIMD);
1039  CheckStep globalMasterFrequency;
1040  bool globalMasterStep=false;
1041  int doGlobalObjects=0;
1042  int doGlobalStaleForces = 0;
1043 
1044  if(doGlobal)
1045  {
1046  globalMasterFrequency.init(step, (simParams->globalMasterFrequency > 0 ? simParams->globalMasterFrequency : numberOfSteps+1));
1047  globalMasterStep = globalMasterFrequency.check(step);
1048  doGlobalObjects = globalMasterStep? 1:0;
1050  {
1051  doGlobalObjects=1;
1052  doGlobalStaleForces=1;
1053  }
1055  {
1056  doGlobalStaleForces = simParams->globalMasterStaleForces;
1057  }
1059  {
1060  doGlobalStaleForces=doGlobalObjects;
1061  }
1062  else
1063  {
1064  doGlobalStaleForces=doGlobalObjects;
1065  }
1066  }
1067  else
1068  {
1069  doGlobalStaleForces = 0;
1070  doGlobalObjects = 0;
1071  }
1072  // The following flags have to be explicitly disabled in Patch object.
1073  patch->flags.doMolly = 0;
1074  patch->flags.doLoweAndersen = 0;
1075  patch->flags.doGBIS = 0;
1076  patch->flags.doLCPO = 0;
1077 
1078  // Square of maximum velocity for simulation safety check
1079  const BigReal maxvel2 =
1080  (simParams->cutoff * simParams->cutoff) / (timestep * timestep);
1081 
1082  // check for Langevin piston
1083  // set period beyond numberOfSteps to disable
1084  // fprintf(stderr, " Patch %d Pinging in from integrate_cuda!\n", this->patch->getPatchID());
1085  CheckStep langevinPistonFrequency;
1086  langevinPistonFrequency.init(step,
1087  (simParams->langevinPistonOn ? slowFreq : numberOfSteps+1 ),
1088  (simParams->langevinPistonOn ? -1-slowFreq/2 : 0) /* = delta */);
1089 
1090  // check for velocity rescaling
1091  // set period beyond numberOfSteps to disable
1092  CheckStep stochRescaleFrequency;
1093  stochRescaleFrequency.init(step, (simParams->stochRescaleOn ?
1094  simParams->stochRescaleFreq : numberOfSteps+1 ) );
1095 
1096  CheckStep reassignVelocityFrequency;
1097  reassignVelocityFrequency.init(step, ((simParams->reassignFreq>0) ?
1098  simParams->reassignFreq : numberOfSteps+1 ) );
1099 
1100  // check for output
1101  // set period beyond numberOfSteps to disable
1102  CheckStep restartFrequency;
1103  restartFrequency.init(step, (simParams->restartFrequency > 0 ?
1104  simParams->restartFrequency : numberOfSteps+1) );
1105  CheckStep dcdFrequency;
1106  dcdFrequency.init(step, (simParams->dcdFrequency > 0 ?
1107  simParams->dcdFrequency : numberOfSteps+1) );
1108  CheckStep velDcdFrequency;
1109  velDcdFrequency.init(step, (simParams->velDcdFrequency > 0 ?
1110  simParams->velDcdFrequency : numberOfSteps+1) );
1111  CheckStep forceDcdFrequency;
1112  forceDcdFrequency.init(step, (simParams->forceDcdFrequency > 0 ?
1113  simParams->forceDcdFrequency : numberOfSteps+1) );
1114  CheckStep imdFrequency;
1115  imdFrequency.init(step, (simParams->IMDon ?
1116  simParams->IMDfreq : numberOfSteps+1) );
1117 
1118  patch->copy_atoms_to_SOA(); // do this whether or not useDeviceMigration
1119 
1120  // Haochuan: is this really needed for GPU-resident?
1121  if (simParams->rigidBonds != RIGID_NONE && ! patch->settle_initialized) {
1123  patch->rattleListValid_SOA = true;
1124  }
1125 
1126  this->suspendULTs();
1127  // for "run 0", numberOfSteps is zero, but we want to have at least a single energy evaluation
1128  if(!masterThread) {
1129  return;
1130  }
1131  bool isMasterPe = (deviceCUDA->getMasterPe() == CkMyPe() );
1133 
1134  CUDASequencer->breakSuspends = false;
1135 
1136  // XXX this is ugly!
1137  // one thread will have the CollectionMaster and Output defined
1138  // use it to set the node group so that any thread can access
1139  if (CUDASequencer->patchData->ptrCollectionMaster == NULL) {
1140  CollectionMaster *pcm = CkpvAccess(CollectionMaster_instance)->Object();
1141  if (pcm) {
1142  CUDASequencer->patchData->ptrCollectionMaster = pcm;
1143  }
1144  }
1145  if (CUDASequencer->patchData->ptrOutput == NULL) {
1146  Output *pout = Node::Object()->output;
1147  if (pout) {
1148  CUDASequencer->patchData->ptrOutput = pout;
1149  }
1150  }
1151  if (CUDASequencer->patchData->pdb == NULL) {
1152  PDB *pdb = Node::Object()->pdb;
1153  if (pdb) {
1154  CUDASequencer->patchData->pdb = pdb;
1155  }
1156  }
1157  if (CUDASequencer->patchData->imd == NULL) {
1158  IMDOutput *imd = Node::Object()->imd;
1159  if (imd->getIMD()) {
1160  CUDASequencer->patchData->imd = imd;
1161  }
1162  }
1163 
1164  // Register ComputeCUDAMgrs from each PE into a list for later usage
1165  if(isMasterPe){
1166  // Each masterPE registers its own computeCUDAMgr
1167  CUDASequencer->patchData->cudaBondedList[CkMyPe()] = ComputeCUDAMgr::getComputeCUDAMgr()->getComputeBondedCUDA();
1168  CUDASequencer->patchData->cudaNonbondedList[CkMyPe()] = ComputeCUDAMgr::getComputeCUDAMgr()->getCudaComputeNonbonded();
1169  }else{
1170  CUDASequencer->patchData->cudaBondedList[CkMyPe()] = NULL;
1171  CUDASequencer->patchData->cudaNonbondedList[CkMyPe()] = NULL;
1172  }
1173 
1174  if (isMasterPe) {
1176  if(dofull && deviceCUDA->getIsPmeDevice()){
1177  CudaPmeOneDevice* cudaPme = 0;
1178  cudaPme = cudaMgr->createCudaPmeOneDevice();
1179  }
1180  }
1181 
1183 
1184 /* JM NOTE: This Will Contains the first calls to the integration loop. The order is:
1185  * 1 - Rattle (0,0)
1186  * 2 - runComputeObjects
1187  * 3 - addForceToMomentum(-0.5, tstep)
1188  * 4 - Rattle (-timestep, 0);
1189  * 5 - submitHalfstep();
1190  * 6 - addForceToMomentum(1.0 , tstep)
1191  * 7 - Rattle (tstep, 1)
1192  * 8 - SubmitHalf()
1193  * 9 - addForceToMomentum(-0.5, tstep)
1194  * 10 - submitReductions()
1195  */
1196 
1197  if(scriptTask == SCRIPT_RUN){
1198  updateDeviceData(1, maxForceUsed, doGlobal);
1199 
1200  if(isMasterPe) {
1201  if(patchData->updateCounter.load()>0)
1202  {
1203  CUDASequencer->updateDeviceKernels();
1204  }
1205 
1206  // warm_up1 is basically rattle1_SOA(0,0)
1207  CUDASequencer->startRun1(maxForceUsed, this->patch->lattice);
1208  (this->patch->flags.sequence)++;
1209  if (deviceCUDA->getIsMasterDevice()){
1210  CUDASequencer->patchData->lat = &(this->patch->lattice);
1211  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
1212  }
1214  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1215  const bool addCudaGlobalForces =
1216  (cudaGlobalMasterObject != nullptr) ?
1217  cudaGlobalMasterObject->willAddGlobalForces() :
1218  false;
1219  if (addCudaGlobalForces) {
1220  CUDASequencer->allocateGPUSavedForces();
1221  }
1222  }
1223 
1225  if (!simParams->useDeviceMigration) {
1226  wakeULTs(); // Wakes everyone back up for migration
1227  this->patch->positionsReady_SOA(1);
1228  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
1229  CUDASequencer->masterThreadSleeping = true;
1230  CUDASequencer->masterThread = CthSelf();
1231  CthSuspend();
1232  }
1234  updateDeviceData(0, maxForceUsed, doGlobal);
1235  } else {
1236  doMigrationGPU(1, doGlobal, simParams->updateAtomMap);
1237  }
1251  if (isMasterPe) {
1252  CUDASequencer->setRescalePairlistTolerance(step < numberOfSteps);
1253  }
1255  // I've migrated everything. Now run computes
1256  runComputeObjectsCUDA(/*isMigration = */ 1 ,
1257  doGlobal,
1258  /* step < numberofSteps */ 1,
1259  /* step = */ 0,
1260  /* startup = */ 1);
1261 
1262  if(isMasterPe){
1263  CUDASequencer->finish_patch_flags(true);
1265  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1266  const bool addCudaGlobalForces =
1267  (cudaGlobalMasterObject != nullptr) ?
1268  cudaGlobalMasterObject->willAddGlobalForces() :
1269  false;
1270  CUDASequencer->startRun2(timestep,
1271  nbondstep, slowstep, this->patch->lattice.origin(),
1272  doGlobal || addCudaGlobalForces, maxForceUsed);
1273  }
1275  if(isMasterPe){
1276  const bool requestTotalForces = computeGlobal ? computeGlobal->getForceSendActive() : false;
1278  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1279  const bool requestGPUTotalForces =
1280  (cudaGlobalMasterObject != nullptr) ?
1281  cudaGlobalMasterObject->requestedTotalForces() :
1282  false;
1283  CUDASequencer->startRun3(timestep,
1284  nbondstep, slowstep, this->patch->lattice.origin(),
1285  requestTotalForces, doGlobalStaleForces,
1287  requestGPUTotalForces,
1288  maxForceUsed);
1289  }
1290 
1291  // save total force in computeGlobal, forces are copied from device
1292  // to host in startRun3
1293  if (doGlobal) {
1295  // store the total force for compute global clients
1296  int numhp = PatchMap::Object()->numHomePatches();
1298  for(int i = 0; i < numhp; ++i) {
1299  HomePatch *hp = hpList->item(i).patch;
1300  computeGlobal->saveTotalForces(hp);
1301  }
1302  }
1303  }
1304  CUDASequencer->submitReductionValues();
1305  // Allow reductions to finish before calling require in print step
1306  // NOTE: Charm++ based reductions will require thread to yield.
1307  // While the barrier will yield control, it doesn't guarantee that all messages have finished
1309 
1310  // Called everything, now I can go ahead and print the step
1311  // PE 0 needs to handle IO as it owns the controller object
1312  // JM: What happens if PE 0 does not own a GPU here? XXX Check
1313  if(deviceCUDA->getIsMasterDevice()) {
1314  CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
1315  c_out->resetMovingAverage();
1316  c_out->printStep(step);
1317  }
1319 
1320  // XXX Should we promote velrescaling into Sequencer in order to save
1321  // the velocity rescaling coefficient between script run commands?
1322  double velrescaling = 1;
1323  // --------- Start of the MD loop ------- //
1324  for( ++step; step <= numberOfSteps; ++step ){
1325  const int imdStep = imdFrequency.check(step);
1326  const int isForcesOutputStep = forceDcdFrequency.check(step) + (doIMD ? imdStep : 0);
1327  int dcdSelectionChecks=0;
1328  Molecule *molecule = Node::Object()->molecule;
1329  for(int dcdindex=0; dcdindex<16;++dcdindex)
1330  {
1331  int dcdSelectionFrequency = molecule->dcdSelectionParams[dcdindex].frequency;
1332  if(dcdSelectionFrequency && step % dcdSelectionFrequency==0)
1333  dcdSelectionChecks++;
1334  }
1335  const int isCollection = restartFrequency.check(step) +
1336  dcdFrequency.check(step) + velDcdFrequency.check(step) +
1337  imdStep + dcdSelectionChecks;
1338  int isMigration = false;
1339  const int doVelocityRescale = stochRescaleFrequency.check(step);
1340  const int doMCPressure = monteCarloPressureFrequency.check(step);
1341  // XXX doVelRescale should instead set a "doTemperature" flag
1342  doEnergy = energyFrequency.check(step) || doVelocityRescale || doMCPressure;
1343  int langevinPistonStep = langevinPistonFrequency.check(step);
1344 
1345  int reassignVelocityStep = reassignVelocityFrequency.check(step);
1346 
1347  // berendsen pressure control
1348  int berendsenPressureStep = 0;
1353  berendsenPressureStep = 1;
1354  }
1355  }
1356  if(patchData->updateCounter.load()>0)
1357  {
1358  CUDASequencer->updateDeviceKernels();
1359  }
1360 
1361  if(doGlobal)
1362  {
1363  globalMasterStep = globalMasterFrequency.check(step);
1364  doGlobalObjects = globalMasterStep? 1:0;
1366  {
1367  doGlobalObjects=1;
1368  doGlobalStaleForces=1;
1369  }
1371  {
1372  doGlobalStaleForces = simParams->globalMasterStaleForces;
1373  }
1375  {
1376  doGlobalStaleForces=doGlobalObjects;
1377  }
1378  else
1379  {
1380  doGlobalStaleForces=doGlobalObjects;
1381  }
1382  }
1383  else
1384  {
1385  doGlobalStaleForces = 0;
1386  doGlobalObjects = 0;
1387  globalMasterStep = false;
1388  }
1389  // CkPrintf("step %d doGlobal %d doGlobalObjects %d doGlobalStaleForces %d globalMasterStep %d globalMasterFrequency %d\n", step, doGlobal, doGlobalObjects, doGlobalStaleForces, globalMasterStep, simParams->globalMasterFrequency);
1390 
1391 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
1392  int& eon = patch->flags.event_on;
1393  int epid = (simParams->beginEventPatchID <= patch->getPatchID()
1394  && patch->getPatchID() <= simParams->endEventPatchID);
1395  int beginStep = simParams->beginEventStep;
1396  int endStep = simParams->endEventStep;
1397  bool controlProfiling = eon && epid;
1398  if (controlProfiling && step == beginStep) {
1400  }
1401  if (controlProfiling && step == endStep) {
1403  }
1404 #endif
1405 
1406  Vector origin = this->patch->lattice.origin();
1407  Tensor factor;
1408  if (deviceCUDA->getIsMasterDevice()) {
1409  if (simParams->langevinPistonOn) {
1410  c_out->piston1(step);
1411  }
1412  // Get the rescale factor for berendsen from controller
1414  c_out->berendsenPressureController(step);
1415  }
1416  }
1417 
1419  if (isMasterPe) cudaCheck(cudaDeviceSynchronize());
1420  // NOTE: Charm++ based broadcasts will require thread to yield.
1421  // While the barrier will yield control, it doesn't guarantee that all messages have finished
1423  }
1424  if (langevinPistonStep || berendsenPressureStep) {
1425  factor = broadcast->positionRescaleFactor.get(step, CkNumPes());
1426 
1427  if (isMasterPe) {
1428  this->patch->lattice.rescale(factor);
1429  CUDASequencer->patchData->lat = &(this->patch->lattice);
1430  CUDASequencer->patchData->factor = &(factor);
1431  }
1432  }
1433 
1435  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_LAUNCHPT1);
1436  int previousMaxForceUsed;
1437  if(isMasterPe){
1438  // need to remember number of buffers for previous force calculation
1439  previousMaxForceUsed = maxForceUsed;
1440  // update local flags
1441  //doNonbonded = !(step%nonbondedFrequency);
1442  // no need to include doMCPressure since it's common factor of nonbondedFrequency
1443  doNonbonded = nonbondedFrequency.check(step);
1444  // no need to include doMCPressure since it's common factor of fullElectFrequency
1445  doFullElectrostatics = (dofull && fullElectFrequency.check(step));
1446  maxForceUsed = Results::normal;
1447  if ( doNonbonded ) maxForceUsed = Results::nbond;
1448  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
1449 
1450  (this->patch->flags.sequence)++;
1451  // JM: Pressures needed for every timestep if the piston is on
1453 
1454  // copy local flags to global
1455  if(deviceCUDA->getIsMasterDevice()) CUDASequencer->patchData->flags.copyIntFlags(this->patch->flags);
1456  }
1457 
1459 
1460  if(isMasterPe){
1461  CUDASequencer->launch_part1(
1462  step,
1463  timestep, nbondstep, slowstep, velrescaling, maxvel2,
1464  *(CUDASequencer->patchData->factor),
1465  origin,
1466  // this->patch->lattice, // need to use the lattice from PE 0 right now
1467  (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1468  reassignVelocityStep,
1469  langevinPistonStep,
1470  berendsenPressureStep,
1471  previousMaxForceUsed, // call with previous maxForceUsed
1472  (const int)(step == simParams->firstTimestep + 1),
1473  this->patch->flags.savePairlists, // XXX how to initialize?
1474  this->patch->flags.usePairlists, // XXX how to initialize?
1475  doEnergy);
1476  // reset velocity rescaling coefficient after applying it
1477  velrescaling = 1;
1478  }
1479  if (reassignVelocityStep)
1480  {
1481  // CkPrintf("dump after launch_part1\n");
1482  // CUDASequencer->printSOAPositionsAndVelocities(2,10);
1483  }
1484  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT1);
1485 
1487 
1488  if(isMasterPe){
1489  CUDASequencer->launch_part11(
1490  timestep, nbondstep, slowstep, velrescaling, maxvel2,
1491  *(CUDASequencer->patchData->factor),
1492  origin,
1493  // this->patch->lattice, // need to use the lattice from PE 0 right now
1494  (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1495  langevinPistonStep,
1496  previousMaxForceUsed, // call with previous maxForceUsed
1497  (const int)(step == simParams->firstTimestep + 1),
1498  this->patch->flags.savePairlists, // XXX how to initialize?
1499  this->patch->flags.usePairlists, // XXX how to initialize?
1500  doEnergy);
1501  // reset velocity rescaling coefficient after applying it
1502  velrescaling = 1;
1503  }
1504  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT1);
1505 
1507 
1508 
1509  for(int i = 0; i < deviceCUDA->getNumDevice(); i++){
1510  if(CUDASequencer->patchData->migrationFlagPerDevice[i] != 0) {
1511  isMigration = true;
1512  break;
1513  }
1514  }
1515 
1516  if(isMasterPe){
1517  // If this is a Device Migration step we'll do it later
1518  if (!simParams->useDeviceMigration || !isMigration) {
1519  CUDASequencer->launch_set_compute_positions();
1520  }
1521  }
1522 
1523  // isMigration = (CUDASequencer->patchData->migrationFlagPerDevice.end() != t) ? 1:0;
1524 
1525  if(isMasterPe) {
1526  // if(CkMyPe() == 0) CUDASequencer->updatePairlistFlags(isMigration);
1527  CUDASequencer->updatePairlistFlags(isMigration);
1528  if (!simParams->useDeviceMigration) {
1529  CUDASequencer->copyPositionsAndVelocitiesToHost(isMigration, doGlobalObjects);
1530  }
1531  if (simParams->useCudaGlobal && !isMigration) {
1532  // Copy atoms to clients if CudaGlobalMaster is used
1533  // For a migration step, we will do it in runComputeObjectsCUDA
1535  auto cudaGlobal = deviceCUDA->getIsGlobalDevice() ? cudaMgr->getCudaGlobalMaster() : nullptr;
1536  if (cudaGlobal) {
1537  cudaGlobal->setStep(static_cast<int64_t>(patch->flags.step));
1538  cudaGlobal->communicateToClients(&(this->patch->lattice));
1539  }
1540  }
1541  }
1542 
1543 
1544  if(isMigration) {
1545  if (!simParams->useDeviceMigration) {
1547  wakeULTs(); // sets the number of patches
1548  this->patch->positionsReady_SOA(isMigration);
1549  if(CUDASequencer->numPatchesCheckedIn < patchMap->numPatchesOnNode(CkMyPe()) -1 ) {
1550  CUDASequencer->masterThreadSleeping = true;
1551  CUDASequencer->masterThread = CthSelf();
1552  CthSuspend(); // suspends until everyone else has pinged back. :]
1553  }
1555  updateDeviceData(0, maxForceUsed, doGlobal);
1556  } else {
1557  doMigrationGPU(false, doGlobal, simParams->updateAtomMap);
1559  }
1560  }
1561 
1562  // Calculate force/energy for bond, nonBond, pme.
1563 
1564  this->runComputeObjectsCUDA(isMigration, doGlobalObjects, step<numberOfSteps, step, 0 /* startup */);
1565 
1566  if (isMasterPe) {
1567  // if(CkMyPe() == 0) CUDASequencer->finish_patch_flags(isMigration);
1568  CUDASequencer->finish_patch_flags(isMigration);
1569  CUDASequencer->patchData->migrationFlagPerDevice[deviceCUDA->getDeviceIndex()] = 0; // flags it back to zero
1570  }
1572 
1573  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_LAUNCHPT2);
1574  if(isMasterPe){
1576  const auto cudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1577  const bool addCudaGlobalForces =
1578  (cudaGlobalMasterObject != nullptr) ?
1579  cudaGlobalMasterObject->willAddGlobalForces() :
1580  false;
1581  CUDASequencer->launch_part2(doMCPressure,
1582  timestep, nbondstep, slowstep,
1583  origin,
1584  step,
1585  maxForceUsed,
1586  langevinPistonStep,
1587  isMigration && (!simParams->useDeviceMigration),
1588  isCollection,
1589  doGlobalStaleForces || addCudaGlobalForces,
1590  doEnergy);
1591  }
1593  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT2);
1594 
1595  // Apply MC pressure control
1596  if(doMCPressure){
1597  monteCarloPressureControl(step, isMigration, 1, 1, maxForceUsed, doGlobalStaleForces);
1599  }
1600 
1601  const bool requestTotalForces = (computeGlobal ? computeGlobal->getForceSendActive() : false) && doGlobalObjects;
1602  // continue launch_part2, after cellBasis fluctuation in MC barostat
1603  if(isMasterPe){
1605  const auto CudaGlobalMasterObject = cudaMgr->getCudaGlobalMaster();
1606  const bool requestGPUTotalForces =
1607  (CudaGlobalMasterObject != nullptr) ?
1608  CudaGlobalMasterObject->requestedTotalForces() :
1609  false;
1610  CUDASequencer->launch_part3(doMCPressure,
1611  timestep, nbondstep, slowstep,
1612  origin,
1613  step,
1614  maxForceUsed,
1615  requestTotalForces, // requested Force
1616  doGlobalStaleForces,
1617  requestGPUTotalForces,
1618  isMigration,
1619  isCollection,
1620  doEnergy,
1621  isForcesOutputStep);
1622  }
1624  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_LAUNCHPT2);
1625 
1626  // save total force in computeGlobal, forces are copied from device
1627  // to host in launch_part3
1628  if (requestTotalForces) {
1630  // store the total force for compute global clients
1631  int numhp = PatchMap::Object()->numHomePatches();
1633  for(int i = 0; i < numhp; ++i) {
1634  HomePatch *hp = hpList->item(i).patch;
1635  computeGlobal->saveTotalForces(hp);
1636  }
1637  }
1638 
1639  CUDASequencer->submitReductionValues();
1640 
1641  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_PRTSTEP);
1642  // Allow reductions to finish before calling require in print step
1643  // NOTE: Charm++ based reductions will require thread to yield.
1644  // While the barrier will yield control, it doesn't guarantee that all messages have finished
1646 
1647  if (deviceCUDA->getIsMasterDevice()) {
1648  // even though you're not on a printstep, calling this still takes 15us approx!!!
1649  c_out->printStep(step);
1650  // stochastic velocity rescaling
1651  // get coefficient from current temperature
1652  // to be applied on NEXT loop iteration
1653  if (doVelocityRescale) {
1654  // calculate coefficient based on current temperature
1655  velrescaling = c_out->stochRescaleCoefficient();
1656  broadcast->stochRescaleCoefficient.publish(step, velrescaling);
1657  }
1658  }
1659  // Non-master PEs should get the rescale factor here.
1660  if (doVelocityRescale) {
1661  // Allow charm++ broadcast to happen and sync
1662  // NOTE: Charm++ based broadcasts will require thread to yield.
1663  // While the barrier will yield control, it doesn't guarantee that all messages have finished
1665  velrescaling = broadcast->stochRescaleCoefficient.get(step, CkNumPes());
1666  }
1667  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_PRTSTEP);
1668 
1669  NAMD_EVENT_START(1, NamdProfileEvent::CUDASOA_SUBCOL);
1670  if (isCollection) {
1673  if (isMasterPe) {
1674  CUDASequencer->copyAoSDataToHost();
1675  }
1676  // Make sure the data has been copied to all home patches. All PEs
1677  // participate in outputting
1679  }
1680  HomePatchList *hplist = patchMap->homePatchList();
1681  for (auto i= hplist->begin(); i != hplist->end(); i++) {
1682  HomePatch *hp = i->patch;
1683  hp->sequencer->submitCollections_SOA(step);
1684  }
1685 
1686  // Allow for collections to finish
1687  // NOTE: Charm++ based reductions will require thread to yield.
1688  // While the barrier will yield control, it doesn't guarantee that all messages have finished
1690  }
1691  NAMD_EVENT_STOP(1, NamdProfileEvent::CUDASOA_SUBCOL);
1692  }
1693 
1696  if (isMasterPe) {
1697  CUDASequencer->copyAoSDataToHost();
1698  CUDASequencer->updateHostPatchDataSOA();
1699  CUDASequencer->saveForceCUDASOA_direct(false, true, maxForceUsed);
1700  }
1702  // Ensure that the SoA data is also fresh to avoid unforeseen issues. sort_solvent_atoms should not actually
1703  // order atoms, but ensure that the solute/solvent counts are accurate
1704  HomePatchList *hplist = patchMap->homePatchList();
1705  for (auto i= hplist->begin(); i != hplist->end(); i++) {
1706  HomePatch *hp = i->patch;
1707  hp->sort_solvent_atoms();
1708  hp->copy_atoms_to_SOA();
1709  hp->copy_forces_to_AOS(); // to support "output withforces"
1710  }
1711  } else {
1712  if(isMasterPe) {
1713  CUDASequencer->updateHostPatchDataSOA();
1714  CUDASequencer->saveForceCUDASOA_direct(false, true, maxForceUsed);
1715  }
1716  if(isMasterPe) CUDASequencer->copyPositionsAndVelocitiesToHost(true,doGlobal);
1718  HomePatchList *hplist = patchMap->homePatchList();
1719  for (auto i= hplist->begin(); i != hplist->end(); i++) {
1720  HomePatch *hp = i->patch;
1721  hp->copy_updates_to_AOS();
1722  hp->copy_forces_to_AOS(); // to support "output withforces"
1723  }
1724  }
1725  syncColl->barrier(SynchronousCollectiveScope::all); // Make sure the data has been copied to all home patches
1726 
1727  //syncColl->barrier(SynchronousCollectiveScope::all);
1728  CUDASequencer->breakSuspends = true;
1729  wakeULTs();
1730  if(deviceCUDA->getIsMasterDevice()) c_out->awaken();
1731 }
1732 
1733 
1734 /*
1735  * Updates device data after a migration
1736  *
1737  */
1738 void Sequencer::updateDeviceData(const int startup, const int maxForceUsed, const int doGlobal) {
1739  bool isMaster = deviceCUDA->getMasterPe() == CkMyPe();
1741  if (isMaster) {
1742  CUDASequencer->copyPatchData(true, startup);
1744  CUDASequencer->reallocateMigrationDestination();
1745  CUDASequencer->copyAtomDataToDeviceAoS();
1746  } else {
1747  CUDASequencer->copyAtomDataToDevice(startup, maxForceUsed);
1748  }
1749  CUDASequencer->migrationLocalPost(startup);
1750  CUDASequencer->migrationUpdateAdvancedFeatures(startup);
1751  // XXX This is only necessary if reallocation happens
1752  CUDASequencer->registerSOAPointersToHost();
1753  }
1755  if (isMaster) {
1756  CUDASequencer->copySOAHostRegisterToDevice();
1758  CUDASequencer->patchData->atomReallocationFlagPerDevice[deviceCUDA->getDeviceIndex()] = 0;
1759  }
1760 
1761  if (doGlobal || simParams->forceDcdFrequency > 0) {
1762  CUDASequencer->updateHostPatchDataSOA(); // Needs to be called after HomePatch::domigration
1763  }
1764  }
1766 }
1767 
1768 /*
1769  * Constructs the meta data structures storing the patch data for GPU resident code path
1770  *
1771  * This is called once during startup
1772  *
1773  */
1776  ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
1777  CudaComputeNonbonded* cudaNbond = cudaMgr->getCudaComputeNonbonded();
1778 
1779  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1780  patchData = cpdata.ckLocalBranch();
1781 
1782  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1783 
1784  // constructDevicePatchMap should only be called once per PE
1785  if (patchData->devicePatchMapFlag[CkMyPe()]) return;
1786  patchData->devicePatchMapFlag[CkMyPe()] = 1;
1787 
1788  // One thread per GPU will execute this block
1789  if (isMasterPe) {
1790  const int deviceIndex = deviceCUDA->getDeviceIndex();
1791 
1792  // Nonbonded patches are computed by CudaComputeNonbonded and contain all the patches and proxy
1793  // patches on this device. HomePatches is computed by SequencerCUDA and only contains the
1794  // home patches. localPatches will be generated by this function
1795  using NBPatchRecord = CudaComputeNonbonded::PatchRecord;
1797  std::vector<NBPatchRecord>& nonbondedPatches = cudaNbond->getPatches();
1798  std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1799  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1800 
1801  // The home patches are not necessarily ordered by their patchID. This can happen if there
1802  // are multiple PEs assigned to the same GPU. Sorting the home patches by their patch ID
1803  // makes it easy to have a consistent ordering
1804  std::stable_sort(
1805  homePatches.begin(),
1806  homePatches.end(),
1807  [](HomePatch* a, HomePatch* b) {
1808  return (a->getPatchID() < b->getPatchID());
1809  });
1810 
1811  // Iterates over all the patches on this device and adds them to h_localPatches
1812  // and determine if they are a home or proxy patch
1813  for (int i = 0; i < nonbondedPatches.size(); i++) {
1814  CudaLocalRecord record;
1815  record.patchID = nonbondedPatches[i].patchID;
1816 
1817  // TODO DMC the patchmap should be able to do this
1818  const int targetPatchID = record.patchID;
1819  auto result = std::find_if(
1820  homePatches.begin(),
1821  homePatches.end(),
1822  [targetPatchID](HomePatch* p) {
1823  return (p->getPatchID() == targetPatchID);
1824  });
1825 
1826  record.isProxy = (result == homePatches.end());
1827  localPatches.push_back(record);
1828  }
1829 
1830  // The home patches should be at the begining of the patch list
1831  // This makes integration easier since we can ignore the patches and operate on a
1832  // contiguous chunk of home atoms
1833  std::stable_sort(
1834  localPatches.begin(),
1835  localPatches.end(),
1836  [](CudaLocalRecord a, CudaLocalRecord b) {
1837  return (a.isProxy < b.isProxy);
1838  });
1839 
1840  // Now the ordering is fixed we can update the bonded and nonbonded orders. Since we have
1841  // moved the home patches to the begining the ordering has changed
1842  cudaBond->updatePatchOrder(localPatches);
1843  cudaNbond->updatePatchOrder(localPatches);
1844  patchData->devData[deviceIndex].numPatchesHome = homePatches.size();
1845  patchData->devData[deviceIndex].numPatchesHomeAndProxy = localPatches.size();
1846  }
1848 
1849  // Iterates over all patches again, and generates the mapping between GPUs. For each patch,
1850  // it checks the other devices to see if the patch is on that device.
1851  // - For HomePatches, there will be a peer record for all of its proxies
1852  // - For ProxyPatches, there will only be a peer record for its home patch
1853  // There is a single array of peer records per device. Each patch stores an offset into this
1854  // array as well as its number of peer records
1855  if (isMasterPe) {
1856  const int deviceIndex = deviceCUDA->getDeviceIndex();
1857  std::vector<CudaPeerRecord>& myPeerPatches = patchData->devData[deviceIndex].h_peerPatches;
1858  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1859 
1860  for (int i = 0; i < localPatches.size(); i++) {
1861  std::vector<CudaPeerRecord> tempPeers;
1862  const int targetPatchID = localPatches[i].patchID;
1863  const int targetIsProxy = localPatches[i].isProxy;
1864 
1865  for (int devIdx = 0; devIdx < deviceCUDA->getNumDevice(); devIdx++) {
1866  if (devIdx == deviceIndex) continue;
1867  std::vector<CudaLocalRecord>& peerPatches = patchData->devData[devIdx].h_localPatches;
1868 
1869  // Searches peerPatches for patchID. If it is not being integrated on this device
1870  // then ignore other non-integration patches
1871  for (int j = 0; j < patchData->devData[devIdx].numPatchesHomeAndProxy; j++) {
1872  const CudaLocalRecord peer = peerPatches[j];
1873  if (peer.patchID == targetPatchID && peer.isProxy != targetIsProxy) {
1874  CudaPeerRecord peerRecord;
1875  peerRecord.deviceIndex = devIdx;
1876  peerRecord.patchIndex = j;
1877  tempPeers.push_back(peerRecord);
1878  break;
1879  }
1880  }
1881  }
1882 
1883  // Once we have the list of peer records, add them to the single device-width vector
1884  // and record the offset and count
1885  localPatches[i].numPeerRecord = tempPeers.size();
1886  if (!tempPeers.empty()) {
1887  localPatches[i].peerRecordStartIndex = myPeerPatches.size();
1888  myPeerPatches.insert(myPeerPatches.end(), tempPeers.begin(), tempPeers.end());
1889  }
1890  }
1891  }
1893 }
1894 
1896  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1897  patchData = cpdata.ckLocalBranch();
1898 
1899  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1900 
1901  if (isMasterPe) {
1902  const int deviceIndex = deviceCUDA->getDeviceIndex();
1903  const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1904  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1905 
1906  CmiLock(patchData->printlock);
1907  CkPrintf("PE: %d\n", CkMyPe());
1908 
1909  CkPrintf("[%d] Home patches %d Local patches %d\n", CkMyPe(), numPatchesHome, localPatches.size());
1910 
1911  CkPrintf("Home Patches: ");
1912  for (int i = 0; i < numPatchesHome; i++) {
1913  CkPrintf("%d ", localPatches[i].patchID);
1914  }
1915  CkPrintf("\n");
1916 
1917  CkPrintf("Proxy Patches: ");
1918  for (int i = numPatchesHome; i < localPatches.size(); i++) {
1919  CkPrintf("%d ", localPatches[i].patchID);
1920  }
1921  CkPrintf("\n");
1922 
1923  CmiUnlock(patchData->printlock);
1924  }
1926 }
1927 
1929  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1930  patchData = cpdata.ckLocalBranch();
1931 
1932  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1933 
1934  // clearDevicePatchMap should only be called once per PE
1935  if (!patchData->devicePatchMapFlag[CkMyPe()]) return;
1936  patchData->devicePatchMapFlag[CkMyPe()] = 0;
1937 
1938  // One thread per GPU will execute this block
1939  if (isMasterPe) {
1940  const int deviceIndex = deviceCUDA->getDeviceIndex();
1941 
1942  using NBPatchRecord = CudaComputeNonbonded::PatchRecord;
1943  std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1944  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1945  std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1946 
1947  homePatches.clear();
1948  localPatches.clear();
1949  peerPatches.clear();
1951  }
1952 }
1953 
1954 /*
1955  * Updates the meta data structures storing the patch data for GPU resident code path
1956  *
1957  * This is called every migration step. The actual mapping stays the same,
1958  * but the atom counts per patch change
1959  *
1960  */
1961 void Sequencer::updateDevicePatchMap(int startup) {
1962  CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1963  patchData = cpdata.ckLocalBranch();
1964 
1965  const bool isMasterPe = deviceCUDA->getMasterPe() == CkMyPe();
1966 
1967  if (isMasterPe) {
1968  const int deviceIndex = deviceCUDA->getDeviceIndex();
1969  const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1970  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1973  CudaComputeNonbonded* cudaNbond = cudaMgr->getCudaComputeNonbonded();
1974 
1975  int max_atom_count = 0;
1976  int total_atom_count = 0;
1977 
1978  // Update the atom count of home patches
1979  for (int i = 0; i < numPatchesHome; i++) {
1980  Patch* patch = NULL;
1981  for(int j = 0; j < deviceCUDA->getNumPesSharingDevice(); j++){
1983  patch = pm->patch(localPatches[i].patchID);
1984  if (patch != NULL) break;
1985  }
1986  if (patch == NULL) NAMD_die("Sequencer: Failed to find patch in updateDevicePatchMap");
1987 
1988  localPatches[i].numAtoms = patch->getNumAtoms();
1989  localPatches[i].numAtomsNBPad = CudaComputeNonbondedKernel::computeAtomPad(localPatches[i].numAtoms);
1990 
1991  if (localPatches[i].numAtoms > max_atom_count) max_atom_count = localPatches[i].numAtoms;
1992  total_atom_count += localPatches[i].numAtoms;
1993  }
1994  }
1996 
1997  // Update the proxy patches next, using the home patch atom counts of other devices
1998  if (isMasterPe) {
1999  const int deviceIndex = deviceCUDA->getDeviceIndex();
2000  const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
2001  const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2002  std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
2003  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2004 
2005  for (int i = numPatchesHome; i < numPatchesHomeAndProxy; i++) {
2006  const int index = localPatches[i].peerRecordStartIndex;
2007  const int devIdx = peerPatches[index].deviceIndex;
2008  const int peerIdx = peerPatches[index].patchIndex;
2009  const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
2010 
2011  localPatches[i].numAtoms = peer.numAtoms;
2012  localPatches[i].numAtomsNBPad = peer.numAtomsNBPad;
2013  }
2014  }
2016 
2017  // Computes the offset for each patch using the atom counts
2018  if (isMasterPe) {
2019  const int deviceIndex = deviceCUDA->getDeviceIndex();
2020  const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2021  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2022 
2023  int runningOffset = 0;
2024  int runningOffsetNBPad = 0;
2025  // TODO Change to a C++ prefix sum
2026  for (int i = 0; i < numPatchesHomeAndProxy; i++) {
2027  localPatches[i].bufferOffset = runningOffset;
2028  localPatches[i].bufferOffsetNBPad = runningOffsetNBPad;
2029  runningOffset += localPatches[i].numAtoms;
2030  runningOffsetNBPad += localPatches[i].numAtomsNBPad;
2031  }
2032  }
2034 
2035  // Update the peer records using the local record data
2036  if (isMasterPe) {
2037  const int deviceIndex = deviceCUDA->getDeviceIndex();
2038  const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2039  std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2040  std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
2041 
2042 
2043  for (int i = 0; i < peerPatches.size(); i++) {
2044  const int devIdx = peerPatches[i].deviceIndex;
2045  const int peerIdx = peerPatches[i].patchIndex;
2046  const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
2047 
2048  peerPatches[i].bufferOffset = peer.bufferOffset;
2049  peerPatches[i].bufferOffsetNBPad = peer.bufferOffsetNBPad;
2050  }
2051 
2052  // Update inline copy of peer data
2053  for (int i = 0; i < numPatchesHomeAndProxy; i++) {
2054  const int numPeerRecord = localPatches[i].numPeerRecord;
2055  const int peerOffset = localPatches[i].peerRecordStartIndex;
2056 
2057  for (int j = 0; j < std::min(numPeerRecord, CudaLocalRecord::num_inline_peer); j++) {
2058  localPatches[i].inline_peers[j] = peerPatches[peerOffset+j];
2059  }
2060  }
2061  }
2063 }
2064 
2065 #endif
2066 
2067 
2068 void Sequencer::integrate_SOA(int scriptTask) {
2069  //
2070  // Below when accessing the array buffers for position, velocity, force,
2071  // note that we don't want to set up pointers directly to the buffers
2072  // because the allocations might get resized after atom migration.
2073  //
2074 
2075 #ifdef TIMER_COLLECTION
2076  TimerSet& t = patch->timerSet;
2077 #endif
2078  TIMER_INIT_WIDTH(t, KICK, simParams->timerBinWidth);
2079  TIMER_INIT_WIDTH(t, MAXMOVE, simParams->timerBinWidth);
2080  TIMER_INIT_WIDTH(t, DRIFT, simParams->timerBinWidth);
2081  TIMER_INIT_WIDTH(t, PISTON, simParams->timerBinWidth);
2082  TIMER_INIT_WIDTH(t, SUBMITHALF, simParams->timerBinWidth);
2083  TIMER_INIT_WIDTH(t, VELBBK1, simParams->timerBinWidth);
2084  TIMER_INIT_WIDTH(t, VELBBK2, simParams->timerBinWidth);
2085  TIMER_INIT_WIDTH(t, RATTLE1, simParams->timerBinWidth);
2086  TIMER_INIT_WIDTH(t, SUBMITFULL, simParams->timerBinWidth);
2087  TIMER_INIT_WIDTH(t, SUBMITCOLLECT, simParams->timerBinWidth);
2088 
2089  // Keep track of the step number.
2090  int &step = patch->flags.step;
2091  step = simParams->firstTimestep;
2092 
2093  // For multiple time stepping, which force boxes are used?
2094  int &maxForceUsed = patch->flags.maxForceUsed;
2095  int &maxForceMerged = patch->flags.maxForceMerged;
2096  maxForceUsed = Results::normal;
2097  maxForceMerged = Results::normal;
2098 
2099  // Keep track of total steps and steps per cycle.
2100  const int numberOfSteps = simParams->N;
2101  //const int stepsPerCycle = simParams->stepsPerCycle;
2102  CheckStep stepsPerCycle;
2103  stepsPerCycle.init(step, simParams->stepsPerCycle);
2104  // The fundamental time step, get the scaling right for velocity units.
2105  const BigReal timestep = simParams->dt * RECIP_TIMEFACTOR;
2106 
2107  //const int nonbondedFrequency = simParams->nonbondedFrequency;
2108  //slowFreq = nonbondedFrequency;
2109  CheckStep nonbondedFrequency;
2111  // The step size for short-range nonbonded forces.
2112  const BigReal nbondstep = timestep * simParams->nonbondedFrequency;
2113  int &doNonbonded = patch->flags.doNonbonded;
2114  //doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
2115  doNonbonded = (step >= numberOfSteps) ||
2116  nonbondedFrequency.init(step, simParams->nonbondedFrequency);
2117  //if ( nonbondedFrequency == 1 ) maxForceMerged = Results::nbond;
2118  if ( nonbondedFrequency.period == 1 ) maxForceMerged = Results::nbond;
2119  if ( doNonbonded ) maxForceUsed = Results::nbond;
2120 
2121  // Do we do full electrostatics?
2122  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
2123  //const int fullElectFrequency = simParams->fullElectFrequency;
2124  //if ( dofull ) slowFreq = fullElectFrequency;
2125  CheckStep fullElectFrequency;
2126  if ( dofull ) slowFreq = simParams->fullElectFrequency;
2127  // The step size for long-range electrostatics.
2128  const BigReal slowstep = timestep * simParams->fullElectFrequency;
2129  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
2130  //doFullElectrostatics = (dofull &&
2131  // ((step >= numberOfSteps) || !(step%fullElectFrequency)));
2132  doFullElectrostatics = (dofull &&
2133  ((step >= numberOfSteps) ||
2134  fullElectFrequency.init(step, simParams->fullElectFrequency)));
2135  //if ( dofull && fullElectFrequency == 1 ) maxForceMerged = Results::slow;
2136  if ( dofull && fullElectFrequency.period == 1 ) maxForceMerged = Results::slow;
2137  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
2138 
2139  // Bother to calculate energies?
2140  int &doEnergy = patch->flags.doEnergy;
2141  //int energyFrequency = simParams->outputEnergies;
2142  CheckStep energyFrequency;
2143  int newComputeEnergies = simParams->computeEnergies;
2144  if(simParams->alchOn) newComputeEnergies = NAMD_gcd(newComputeEnergies, simParams->alchOutFreq);
2145  doEnergy = energyFrequency.init(step, newComputeEnergies);
2146 
2147  // Do we need to return forces to TCL script or Colvar module?
2148  int doTcl = simParams->tclForcesOn;
2149  int doColvars = simParams->colvarsOn;
2150  int doGlobal = doTcl || doColvars;
2152  int &doVirial = patch->flags.doVirial;
2153  doVirial = 1;
2154 
2155  // The following flags have to be explicitly disabled in Patch object.
2156  patch->flags.doMolly = 0;
2157  patch->flags.doLoweAndersen = 0;
2158  patch->flags.doGBIS = 0;
2159  patch->flags.doLCPO = 0;
2160 
2161  // Square of maximum velocity for simulation safety check
2162  const BigReal maxvel2 =
2163  (simParams->cutoff * simParams->cutoff) / (timestep * timestep);
2164 
2165  // check for Langevin piston
2166  // set period beyond numberOfSteps to disable
2167  CheckStep langevinPistonFrequency;
2168  langevinPistonFrequency.init(step,
2169  (simParams->langevinPistonOn ? slowFreq : numberOfSteps+1 ),
2170  (simParams->langevinPistonOn ? -1-slowFreq/2 : 0) /* = delta */);
2171 
2172  // check for output
2173  // set period beyond numberOfSteps to disable
2174  CheckStep restartFrequency;
2175  restartFrequency.init(step, (simParams->restartFrequency ?
2176  simParams->restartFrequency : numberOfSteps+1) );
2177  CheckStep dcdFrequency;
2178  dcdFrequency.init(step, (simParams->dcdFrequency ?
2179  simParams->dcdFrequency : numberOfSteps+1) );
2180  CheckStep velDcdFrequency;
2181  velDcdFrequency.init(step, (simParams->velDcdFrequency ?
2182  simParams->velDcdFrequency : numberOfSteps+1) );
2183  CheckStep forceDcdFrequency;
2184  forceDcdFrequency.init(step, (simParams->forceDcdFrequency ?
2185  simParams->forceDcdFrequency : numberOfSteps+1) );
2186  CheckStep imdFrequency;
2187  imdFrequency.init(step, (simParams->IMDfreq ?
2188  simParams->IMDfreq : numberOfSteps+1) );
2189 
2190  if ( scriptTask == SCRIPT_RUN ) {
2191  // enforce rigid bond constraints on initial positions
2192  TIMER_START(t, RATTLE1);
2193  rattle1_SOA(0., 0);
2194  TIMER_STOP(t, RATTLE1);
2195 
2196  // must migrate here!
2197  int natoms = patch->patchDataSOA.numAtoms;
2198  runComputeObjects_SOA(1, step<numberOfSteps, step);
2199  // kick -0.5
2200  TIMER_START(t, KICK);
2201  addForceToMomentum_SOA(-0.5, timestep, nbondstep, slowstep,
2202 #ifndef SOA_SIMPLIFY_PARAMS
2203  patch->patchDataSOA.recipMass,
2204  patch->patchDataSOA.f_normal_x,
2205  patch->patchDataSOA.f_normal_y,
2206  patch->patchDataSOA.f_normal_z,
2207  patch->patchDataSOA.f_nbond_x,
2208  patch->patchDataSOA.f_nbond_y,
2209  patch->patchDataSOA.f_nbond_z,
2210  patch->patchDataSOA.f_slow_x,
2211  patch->patchDataSOA.f_slow_y,
2212  patch->patchDataSOA.f_slow_z,
2213  patch->patchDataSOA.vel_x,
2214  patch->patchDataSOA.vel_y,
2215  patch->patchDataSOA.vel_z,
2216  patch->patchDataSOA.numAtoms,
2217 #endif
2218  maxForceUsed
2219  );
2220  TIMER_STOP(t, KICK);
2221 
2222  TIMER_START(t, RATTLE1);
2223  rattle1_SOA(-timestep, 0);
2224  TIMER_STOP(t, RATTLE1);
2225 
2226  TIMER_START(t, SUBMITHALF);
2228 #ifndef SOA_SIMPLIFY_PARAMS
2229  patch->patchDataSOA.hydrogenGroupSize,
2230  patch->patchDataSOA.mass,
2231  patch->patchDataSOA.vel_x,
2232  patch->patchDataSOA.vel_y,
2233  patch->patchDataSOA.vel_z,
2234  patch->patchDataSOA.numAtoms
2235 #endif
2236  );
2237  TIMER_STOP(t, SUBMITHALF);
2238 
2239  // kick 1.0
2240  TIMER_START(t, KICK);
2241  addForceToMomentum_SOA(1.0, timestep, nbondstep, slowstep,
2242 #ifndef SOA_SIMPLIFY_PARAMS
2243  patch->patchDataSOA.recipMass,
2244  patch->patchDataSOA.f_normal_x,
2245  patch->patchDataSOA.f_normal_y,
2246  patch->patchDataSOA.f_normal_z,
2247  patch->patchDataSOA.f_nbond_x,
2248  patch->patchDataSOA.f_nbond_y,
2249  patch->patchDataSOA.f_nbond_z,
2250  patch->patchDataSOA.f_slow_x,
2251  patch->patchDataSOA.f_slow_y,
2252  patch->patchDataSOA.f_slow_z,
2253  patch->patchDataSOA.vel_x,
2254  patch->patchDataSOA.vel_y,
2255  patch->patchDataSOA.vel_z,
2256  patch->patchDataSOA.numAtoms,
2257 #endif
2258  maxForceUsed
2259  );
2260  TIMER_STOP(t, KICK);
2261 
2262  TIMER_START(t, RATTLE1);
2263  rattle1_SOA(timestep, 1);
2264  TIMER_STOP(t, RATTLE1);
2265 
2266  // save total force in computeGlobal
2267  if (doGlobal) {
2268  computeGlobal->saveTotalForces(patch);
2269  }
2270 
2271  TIMER_START(t, SUBMITHALF);
2273 #ifndef SOA_SIMPLIFY_PARAMS
2274  patch->patchDataSOA.hydrogenGroupSize,
2275  patch->patchDataSOA.mass,
2276  patch->patchDataSOA.vel_x,
2277  patch->patchDataSOA.vel_y,
2278  patch->patchDataSOA.vel_z,
2279  patch->patchDataSOA.numAtoms
2280 #endif
2281  );
2282  TIMER_STOP(t, SUBMITHALF);
2283 
2284  // kick -0.5
2285  TIMER_START(t, KICK);
2286  addForceToMomentum_SOA(-0.5, timestep, nbondstep, slowstep,
2287 #ifndef SOA_SIMPLIFY_PARAMS
2288  patch->patchDataSOA.recipMass,
2289  patch->patchDataSOA.f_normal_x,
2290  patch->patchDataSOA.f_normal_y,
2291  patch->patchDataSOA.f_normal_z,
2292  patch->patchDataSOA.f_nbond_x,
2293  patch->patchDataSOA.f_nbond_y,
2294  patch->patchDataSOA.f_nbond_z,
2295  patch->patchDataSOA.f_slow_x,
2296  patch->patchDataSOA.f_slow_y,
2297  patch->patchDataSOA.f_slow_z,
2298  patch->patchDataSOA.vel_x,
2299  patch->patchDataSOA.vel_y,
2300  patch->patchDataSOA.vel_z,
2301  patch->patchDataSOA.numAtoms,
2302 #endif
2303  maxForceUsed
2304  );
2305  TIMER_STOP(t, KICK);
2306 
2307  TIMER_START(t, SUBMITFULL);
2309 #ifndef SOA_SIMPLIFY_PARAMS
2310  patch->patchDataSOA.hydrogenGroupSize,
2311  patch->patchDataSOA.mass,
2312  patch->patchDataSOA.pos_x,
2313  patch->patchDataSOA.pos_y,
2314  patch->patchDataSOA.pos_z,
2315  patch->patchDataSOA.vel_x,
2316  patch->patchDataSOA.vel_y,
2317  patch->patchDataSOA.vel_z,
2318  patch->patchDataSOA.f_normal_x,
2319  patch->patchDataSOA.f_normal_y,
2320  patch->patchDataSOA.f_normal_z,
2321  patch->patchDataSOA.f_nbond_x,
2322  patch->patchDataSOA.f_nbond_y,
2323  patch->patchDataSOA.f_nbond_z,
2324  patch->patchDataSOA.f_slow_x,
2325  patch->patchDataSOA.f_slow_y,
2326  patch->patchDataSOA.f_slow_z,
2327  patch->patchDataSOA.numAtoms
2328 #endif
2329  );
2330  TIMER_STOP(t, SUBMITFULL);
2331 
2332  rebalanceLoad(step);
2333  } // scriptTask == SCRIPT_RUN
2334 
2335 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
2336  int& eon = patch->flags.event_on;
2337  int epid = (simParams->beginEventPatchID <= patch->getPatchID()
2338  && patch->getPatchID() <= simParams->endEventPatchID);
2339  int beginStep = simParams->beginEventStep;
2340  int endStep = simParams->endEventStep;
2341  bool controlProfiling = (patch->getPatchID() >= simParams->beginEventPatchID && patch->getPatchID() <= simParams->endEventPatchID);
2342 #endif
2343 
2344  for ( ++step; step <= numberOfSteps; ++step ) {
2345  int dcdSelectionChecks=0;
2346  Molecule *molecule = Node::Object()->molecule;
2347  for(int dcdindex=0; dcdindex<16;++dcdindex)
2348  {
2349  int dcdSelectionFrequency = molecule->dcdSelectionParams[dcdindex].frequency;
2350  if(dcdSelectionFrequency && step % dcdSelectionFrequency==0)
2351  dcdSelectionChecks++;
2352  }
2353  const int isCollection = restartFrequency.check(step) +
2354  dcdFrequency.check(step) + velDcdFrequency.check(step) +
2355  forceDcdFrequency.check(step) + imdFrequency.check(step) +
2356  dcdSelectionChecks;
2357  const int isMigration = stepsPerCycle.check(step);
2358  doEnergy = energyFrequency.check(step);
2359  DebugM(3,"doGlobal now "<< doGlobal<<"\n"<<endi);
2360 
2361 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
2362  eon = epid && (beginStep < step && step <= endStep);
2363 
2364  if (controlProfiling && step == beginStep) {
2366  }
2367  if (controlProfiling && step == endStep) {
2369  }
2370 // NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_SOA_1);
2371  char buf[32];
2372  sprintf(buf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::INTEGRATE_SOA_1], patch->getPatchID());
2373  NAMD_EVENT_START_EX(eon, NamdProfileEvent::INTEGRATE_SOA_1, buf);
2374 #endif
2375 
2376  if ( simParams->stochRescaleOn ) {
2378  }
2379 
2380  if ( simParams->berendsenPressureOn ) {
2382 #ifndef SOA_SIMPLIFY_PARAMS
2383  patch->patchDataSOA.hydrogenGroupSize,
2384  patch->patchDataSOA.mass,
2385  patch->patchDataSOA.pos_x,
2386  patch->patchDataSOA.pos_y,
2387  patch->patchDataSOA.pos_z,
2388  patch->patchDataSOA.numAtoms,
2389 #endif
2390  step);
2391  }
2392 
2393  // kick 0.5
2394  TIMER_START(t, KICK);
2395  addForceToMomentum_SOA(0.5, timestep, nbondstep, slowstep,
2396 #ifndef SOA_SIMPLIFY_PARAMS
2397  patch->patchDataSOA.recipMass,
2398  patch->patchDataSOA.f_normal_x,
2399  patch->patchDataSOA.f_normal_y,
2400  patch->patchDataSOA.f_normal_z,
2401  patch->patchDataSOA.f_nbond_x,
2402  patch->patchDataSOA.f_nbond_y,
2403  patch->patchDataSOA.f_nbond_z,
2404  patch->patchDataSOA.f_slow_x,
2405  patch->patchDataSOA.f_slow_y,
2406  patch->patchDataSOA.f_slow_z,
2407  patch->patchDataSOA.vel_x,
2408  patch->patchDataSOA.vel_y,
2409  patch->patchDataSOA.vel_z,
2410  patch->patchDataSOA.numAtoms,
2411 #endif
2412  maxForceUsed
2413  );
2414  TIMER_STOP(t, KICK);
2415 
2416  // maximumMove checks velocity bound on atoms
2417  TIMER_START(t, MAXMOVE);
2418  maximumMove_SOA(timestep, maxvel2
2419 #ifndef SOA_SIMPLIFY_PARAMS
2420  ,
2421  patch->patchDataSOA.vel_x,
2422  patch->patchDataSOA.vel_y,
2423  patch->patchDataSOA.vel_z,
2424  patch->patchDataSOA.numAtoms
2425 #endif
2426  );
2427  TIMER_STOP(t, MAXMOVE);
2428 
2429 
2430  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_SOA_1);
2431 
2432  // Check to see if Langevin piston is enabled this step:
2433  // ! ((step-1-slowFreq/2) % slowFreq)
2434  if ( langevinPistonFrequency.check(step) ) {
2435  // if (langevinPistonStep) {
2436  // drift 0.5
2437  TIMER_START(t, DRIFT);
2438  addVelocityToPosition_SOA(0.5*timestep
2439 #ifndef SOA_SIMPLIFY_PARAMS
2440  ,
2441  patch->patchDataSOA.vel_x,
2442  patch->patchDataSOA.vel_y,
2443  patch->patchDataSOA.vel_z,
2444  patch->patchDataSOA.pos_x,
2445  patch->patchDataSOA.pos_y,
2446  patch->patchDataSOA.pos_z,
2447  patch->patchDataSOA.numAtoms
2448 #endif
2449  );
2450  TIMER_STOP(t, DRIFT);
2451  // There is a blocking receive inside of langevinPiston()
2452  // that might suspend the current thread of execution,
2453  // so split profiling around this conditional block.
2455 #ifndef SOA_SIMPLIFY_PARAMS
2456  patch->patchDataSOA.hydrogenGroupSize,
2457  patch->patchDataSOA.mass,
2458  patch->patchDataSOA.pos_x,
2459  patch->patchDataSOA.pos_y,
2460  patch->patchDataSOA.pos_z,
2461  patch->patchDataSOA.vel_x,
2462  patch->patchDataSOA.vel_y,
2463  patch->patchDataSOA.vel_z,
2464  patch->patchDataSOA.numAtoms,
2465 #endif
2466  step
2467  );
2468 
2469  // drift 0.5
2470  TIMER_START(t, DRIFT);
2471  addVelocityToPosition_SOA(0.5*timestep
2472 #ifndef SOA_SIMPLIFY_PARAMS
2473  ,
2474  patch->patchDataSOA.vel_x,
2475  patch->patchDataSOA.vel_y,
2476  patch->patchDataSOA.vel_z,
2477  patch->patchDataSOA.pos_x,
2478  patch->patchDataSOA.pos_y,
2479  patch->patchDataSOA.pos_z,
2480  patch->patchDataSOA.numAtoms
2481 #endif
2482  );
2483  TIMER_STOP(t, DRIFT);
2484  }
2485  else {
2486  // drift 1.0
2487  TIMER_START(t, DRIFT);
2488  addVelocityToPosition_SOA(timestep
2489 #ifndef SOA_SIMPLIFY_PARAMS
2490  ,
2491  patch->patchDataSOA.vel_x,
2492  patch->patchDataSOA.vel_y,
2493  patch->patchDataSOA.vel_z,
2494  patch->patchDataSOA.pos_x,
2495  patch->patchDataSOA.pos_y,
2496  patch->patchDataSOA.pos_z,
2497  patch->patchDataSOA.numAtoms
2498 #endif
2499  );
2500  TIMER_STOP(t, DRIFT);
2501  }
2502 
2503  //NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_SOA_2);
2504 
2505  // There are NO sends in submitHalfstep() just local summation
2506  // into the Reduction struct.
2507  TIMER_START(t, SUBMITHALF);
2509 #ifndef SOA_SIMPLIFY_PARAMS
2510  patch->patchDataSOA.hydrogenGroupSize,
2511  patch->patchDataSOA.mass,
2512  patch->patchDataSOA.vel_x,
2513  patch->patchDataSOA.vel_y,
2514  patch->patchDataSOA.vel_z,
2515  patch->patchDataSOA.numAtoms
2516 #endif
2517  );
2518  TIMER_STOP(t, SUBMITHALF);
2519 
2520  //doNonbonded = !(step%nonbondedFrequency);
2521  doNonbonded = nonbondedFrequency.check(step);
2522  //doFullElectrostatics = (dofull && !(step%fullElectFrequency));
2523  doFullElectrostatics = (dofull && fullElectFrequency.check(step));
2524 
2525  maxForceUsed = Results::normal;
2526  if ( doNonbonded ) maxForceUsed = Results::nbond;
2527  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
2528 
2529  // Migrate Atoms on stepsPerCycle
2530  // Check to see if this is energy evaluation step:
2531  // doEnergy = ! ( step % energyFrequency );
2532  doVirial = 1;
2533  doKineticEnergy = 1;
2534  doMomenta = 1;
2535 
2536  //NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_SOA_2); // integrate_SOA 2
2537 
2538  // The current thread of execution will suspend in runComputeObjects().
2539  // Check to see if we are at a migration step:
2540  // runComputeObjects_SOA(!(step%stepsPerCycle), step<numberOfSteps);
2541  runComputeObjects_SOA(isMigration, step<numberOfSteps, step);
2542 
2543  NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_SOA_3);
2544 
2545  TIMER_START(t, VELBBK1);
2547  timestep
2548 #ifndef SOA_SIMPLIFY_PARAMS
2549  ,
2550  patch->patchDataSOA.langevinParam,
2551  patch->patchDataSOA.vel_x,
2552  patch->patchDataSOA.vel_y,
2553  patch->patchDataSOA.vel_z,
2554  patch->patchDataSOA.numAtoms
2555 #endif
2556  );
2557  TIMER_STOP(t, VELBBK1);
2558 
2559  // kick 1.0
2560  TIMER_START(t, KICK);
2561  addForceToMomentum_SOA(1.0, timestep, nbondstep, slowstep,
2562 #ifndef SOA_SIMPLIFY_PARAMS
2563  patch->patchDataSOA.recipMass,
2564  patch->patchDataSOA.f_normal_x,
2565  patch->patchDataSOA.f_normal_y,
2566  patch->patchDataSOA.f_normal_z,
2567  patch->patchDataSOA.f_nbond_x,
2568  patch->patchDataSOA.f_nbond_y,
2569  patch->patchDataSOA.f_nbond_z,
2570  patch->patchDataSOA.f_slow_x,
2571  patch->patchDataSOA.f_slow_y,
2572  patch->patchDataSOA.f_slow_z,
2573  patch->patchDataSOA.vel_x,
2574  patch->patchDataSOA.vel_y,
2575  patch->patchDataSOA.vel_z,
2576  patch->patchDataSOA.numAtoms,
2577 #endif
2578  maxForceUsed
2579  );
2580  TIMER_STOP(t, KICK);
2581 
2582  TIMER_START(t, VELBBK2);
2584  timestep
2585 #ifndef SOA_SIMPLIFY_PARAMS
2586  ,
2587  patch->patchDataSOA.langevinParam,
2588  patch->patchDataSOA.langScalVelBBK2,
2589  patch->patchDataSOA.langScalRandBBK2,
2590  patch->patchDataSOA.gaussrand_x,
2591  patch->patchDataSOA.gaussrand_y,
2592  patch->patchDataSOA.gaussrand_z,
2593  patch->patchDataSOA.vel_x,
2594  patch->patchDataSOA.vel_y,
2595  patch->patchDataSOA.vel_z,
2596  patch->patchDataSOA.numAtoms
2597 #endif
2598  );
2599  TIMER_STOP(t, VELBBK2);
2600 
2601  TIMER_START(t, RATTLE1);
2602  rattle1_SOA(timestep, 1);
2603  TIMER_STOP(t, RATTLE1);
2604 
2605  // save total force in computeGlobal
2606  if (doGlobal) {
2607  computeGlobal->saveTotalForces(patch);
2608  }
2609 
2610  TIMER_START(t, SUBMITHALF);
2612 #ifndef SOA_SIMPLIFY_PARAMS
2613  patch->patchDataSOA.hydrogenGroupSize,
2614  patch->patchDataSOA.mass,
2615  patch->patchDataSOA.vel_x,
2616  patch->patchDataSOA.vel_y,
2617  patch->patchDataSOA.vel_z,
2618  patch->patchDataSOA.numAtoms
2619 #endif
2620  );
2621  TIMER_STOP(t, SUBMITHALF);
2622 
2623  // kick -0.5
2624  TIMER_START(t, KICK);
2625  addForceToMomentum_SOA(-0.5, timestep, nbondstep, slowstep,
2626 #ifndef SOA_SIMPLIFY_PARAMS
2627  patch->patchDataSOA.recipMass,
2628  patch->patchDataSOA.f_normal_x,
2629  patch->patchDataSOA.f_normal_y,
2630  patch->patchDataSOA.f_normal_z,
2631  patch->patchDataSOA.f_nbond_x,
2632  patch->patchDataSOA.f_nbond_y,
2633  patch->patchDataSOA.f_nbond_z,
2634  patch->patchDataSOA.f_slow_x,
2635  patch->patchDataSOA.f_slow_y,
2636  patch->patchDataSOA.f_slow_z,
2637  patch->patchDataSOA.vel_x,
2638  patch->patchDataSOA.vel_y,
2639  patch->patchDataSOA.vel_z,
2640  patch->patchDataSOA.numAtoms,
2641 #endif
2642  maxForceUsed
2643  );
2644  TIMER_STOP(t, KICK);
2645 
2646  // XXX rattle2_SOA(timestep,step);
2647 
2648  TIMER_START(t, SUBMITFULL);
2650 #ifndef SOA_SIMPLIFY_PARAMS
2651  patch->patchDataSOA.hydrogenGroupSize,
2652  patch->patchDataSOA.mass,
2653  patch->patchDataSOA.pos_x,
2654  patch->patchDataSOA.pos_y,
2655  patch->patchDataSOA.pos_z,
2656  patch->patchDataSOA.vel_x,
2657  patch->patchDataSOA.vel_y,
2658  patch->patchDataSOA.vel_z,
2659  patch->patchDataSOA.f_normal_x,
2660  patch->patchDataSOA.f_normal_y,
2661  patch->patchDataSOA.f_normal_z,
2662  patch->patchDataSOA.f_nbond_x,
2663  patch->patchDataSOA.f_nbond_y,
2664  patch->patchDataSOA.f_nbond_z,
2665  patch->patchDataSOA.f_slow_x,
2666  patch->patchDataSOA.f_slow_y,
2667  patch->patchDataSOA.f_slow_z,
2668  patch->patchDataSOA.numAtoms
2669 #endif
2670  );
2671  TIMER_STOP(t, SUBMITFULL);
2672 #ifdef TESTPID
2673  if (1) {
2674  int pid = TESTPID;
2675  if (patch->patchID == pid) {
2676  const PatchDataSOA& p = patch->patchDataSOA;
2677  int n = p.numAtoms;
2678 #if 0
2679  fprintf(stderr, "Patch %d has %d atoms\n", pid, n);
2680  fprintf(stderr, "%3s %8s %12s %12s %12s\n",
2681  "", "id", "fnormal_x", "fnbond_x", "fslow_x");
2682  for (int i=0; i < n; i++) {
2683  int index = p.id[i];
2684  fprintf(stderr, "%3d %8d %12.8f %12.8f %12.8f\n",
2685  i, index, p.f_normal_x[i], p.f_nbond_x[i], p.f_slow_x[i]);
2686  }
2687 #else
2688  Vector *f_normal = new Vector[n];
2689  Vector *f_nbond = new Vector[n];
2690  Vector *f_slow = new Vector[n];
2691  for (int i=0; i < n; i++) {
2692  f_normal[i].x = p.f_normal_x[i];
2693  f_normal[i].y = p.f_normal_y[i];
2694  f_normal[i].z = p.f_normal_z[i];
2695  f_nbond[i].x = p.f_nbond_x[i];
2696  f_nbond[i].y = p.f_nbond_y[i];
2697  f_nbond[i].z = p.f_nbond_z[i];
2698  f_slow[i].x = p.f_slow_x[i];
2699  f_slow[i].y = p.f_slow_y[i];
2700  f_slow[i].z = p.f_slow_z[i];
2701  }
2702  TestArray_write<double>(
2703  "f_normal_good.bin", "f_normal good", (double*)f_normal, 3*n);
2704  TestArray_write<double>(
2705  "f_nbond_good.bin", "f_nbond good", (double*)f_nbond, 3*n);
2706  TestArray_write<double>(
2707  "f_slow_good.bin", "f_slow good", (double*)f_slow, 3*n);
2708  delete [] f_normal;
2709  delete [] f_nbond;
2710  delete [] f_slow;
2711 #endif
2712  }
2713  }
2714 #endif
2715 
2716  // Do collections if any checks below are "on."
2717  // We add because we can't short-circuit.
2718  TIMER_START(t, SUBMITCOLLECT);
2719  if (isCollection) {
2720  submitCollections_SOA(step);
2721  }
2722  TIMER_STOP(t, SUBMITCOLLECT);
2723 
2724  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_SOA_3); // integrate_SOA 3
2725 
2726  rebalanceLoad(step);
2727  }
2728 
2729  patch->copy_updates_to_AOS();
2730 
2731  TIMER_DONE(t);
2732  if (patch->patchID == SPECIAL_PATCH_ID) {
2733  printf("Timer collection reporting in microseconds for "
2734  "Patch %d\n", patch->patchID);
2735  TIMER_REPORT(t);
2736  }
2737 }
2738 
2739 
2740 // XXX inline it?
2741 // XXX does not handle fixed atoms
2742 // Each timestep: dt = scaling * (timestep / TIMEFACTOR);
2744  const double scaling,
2745  double dt_normal, // timestep Results::normal = 0
2746  double dt_nbond, // timestep Results::nbond = 1
2747  double dt_slow, // timestep Results::slow = 2
2748 #ifndef SOA_SIMPLIFY_PARAMS
2749  const double * __restrict recipMass,
2750  const double * __restrict f_normal_x, // force Results::normal = 0
2751  const double * __restrict f_normal_y,
2752  const double * __restrict f_normal_z,
2753  const double * __restrict f_nbond_x, // force Results::nbond = 1
2754  const double * __restrict f_nbond_y,
2755  const double * __restrict f_nbond_z,
2756  const double * __restrict f_slow_x, // force Results::slow = 2
2757  const double * __restrict f_slow_y,
2758  const double * __restrict f_slow_z,
2759  double * __restrict vel_x,
2760  double * __restrict vel_y,
2761  double * __restrict vel_z,
2762  int numAtoms,
2763 #endif
2764  int maxForceNumber
2765  ) {
2766  NAMD_EVENT_RANGE_2(patch->flags.event_on,
2767  NamdProfileEvent::ADD_FORCE_TO_MOMENTUM_SOA);
2768 
2769 #ifdef SOA_SIMPLIFY_PARAMS
2770  const double * __restrict recipMass = patch->patchDataSOA.recipMass;
2771  // force Results::normal = 0
2772  const double * __restrict f_normal_x = patch->patchDataSOA.f_normal_x;
2773  const double * __restrict f_normal_y = patch->patchDataSOA.f_normal_y;
2774  const double * __restrict f_normal_z = patch->patchDataSOA.f_normal_z;
2775  // force Results::nbond = 1
2776  const double * __restrict f_nbond_x = patch->patchDataSOA.f_nbond_x;
2777  const double * __restrict f_nbond_y = patch->patchDataSOA.f_nbond_y;
2778  const double * __restrict f_nbond_z = patch->patchDataSOA.f_nbond_z;
2779  // force Results::slow = 2
2780  const double * __restrict f_slow_x = patch->patchDataSOA.f_slow_x;
2781  const double * __restrict f_slow_y = patch->patchDataSOA.f_slow_y;
2782  const double * __restrict f_slow_z = patch->patchDataSOA.f_slow_z;
2783  double * __restrict vel_x = patch->patchDataSOA.vel_x;
2784  double * __restrict vel_y = patch->patchDataSOA.vel_y;
2785  double * __restrict vel_z = patch->patchDataSOA.vel_z;
2786  int numAtoms = patch->patchDataSOA.numAtoms;
2787 #endif
2788  //
2789  // We could combine each case into a single loop with breaks,
2790  // with all faster forces also summed, like addForceToMomentum3().
2791  //
2792  // Things to consider:
2793  // - Do we always use acceleration (f/m) instead of just plain force?
2794  // Then we could instead buffer accel_slow, accel_nbond, etc.
2795  // - We will always need one multiply, since each dt includes
2796  // also a scaling factor.
2797  //
2798 
2799 #if 0
2800  if(this->patch->getPatchID() == 538){
2801  // fprintf(stderr, "Old Positions %lf %lf %lf\n", patch->patchDataSOA.pos_x[43], patch->patchDataSOA.pos_y[43], patch->patchDataSOA.pos_z[43]);
2802  // fprintf(stderr, "Old Velocities %lf %lf %lf\n", vel_x[43], vel_y[43], vel_z[ 43]);
2803  // fprintf(stderr, "Adding forces %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
2804  // f_slow_x[43], f_slow_y[43], f_slow_z[43],
2805  // f_nbond_x[43], f_nbond_y[43], f_nbond_z[43],
2806  // f_normal_x[43], f_normal_y[43], f_normal_z[43]);
2807  fprintf(stderr, "Old Positions %lf %lf %lf\n", patch->patchDataSOA.pos_x[0], patch->patchDataSOA.pos_y[0], patch->patchDataSOA.pos_z[0]);
2808  fprintf(stderr, "Old Velocities %lf %lf %lf\n", vel_x[0], vel_y[0], vel_z[ 0]);
2809  fprintf(stderr, "Adding forces %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
2810  f_slow_x[43], f_slow_y[43], f_slow_z[43],
2811  f_nbond_x[43], f_nbond_y[43], f_nbond_z[43],
2812  f_normal_x[43], f_normal_y[43], f_normal_z[43]);
2813  }
2814 #endif
2815  switch (maxForceNumber) {
2816  case Results::slow:
2817  dt_slow *= scaling;
2818  for (int i=0; i < numAtoms; i++) {
2819  vel_x[i] += f_slow_x[i] * recipMass[i] * dt_slow;
2820  vel_y[i] += f_slow_y[i] * recipMass[i] * dt_slow;
2821  vel_z[i] += f_slow_z[i] * recipMass[i] * dt_slow;
2822  }
2823  // fall through because we will always have the "faster" forces
2824  case Results::nbond:
2825  dt_nbond *= scaling;
2826  for (int i=0; i < numAtoms; i++) {
2827  vel_x[i] += f_nbond_x[i] * recipMass[i] * dt_nbond;
2828  vel_y[i] += f_nbond_y[i] * recipMass[i] * dt_nbond;
2829  vel_z[i] += f_nbond_z[i] * recipMass[i] * dt_nbond;
2830  }
2831  // fall through because we will always have the "faster" forces
2832  case Results::normal:
2833  dt_normal *= scaling;
2834  for (int i=0; i < numAtoms; i++) {
2835  vel_x[i] += f_normal_x[i] * recipMass[i] * dt_normal;
2836  vel_y[i] += f_normal_y[i] * recipMass[i] * dt_normal;
2837  vel_z[i] += f_normal_z[i] * recipMass[i] * dt_normal;
2838  }
2839  }
2840 }
2841 
2842 
2843 // XXX inline it?
2844 // XXX does not handle fixed atoms
2845 // Timestep: dt = scaling * (timestep / TIMEFACTOR);
2847  const double dt
2848 #ifndef SOA_SIMPLIFY_PARAMS
2849  ,
2850  const double * __restrict vel_x,
2851  const double * __restrict vel_y,
2852  const double * __restrict vel_z,
2853  double * __restrict pos_x,
2854  double * __restrict pos_y,
2855  double * __restrict pos_z,
2856  int numAtoms
2857 #endif
2858  ) {
2859  NAMD_EVENT_RANGE_2(patch->flags.event_on,
2860  NamdProfileEvent::ADD_VELOCITY_TO_POSITION_SOA);
2861 #ifdef SOA_SIMPLIFY_PARAMS
2862  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
2863  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
2864  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
2865  double * __restrict pos_x = patch->patchDataSOA.pos_x;
2866  double * __restrict pos_y = patch->patchDataSOA.pos_y;
2867  double * __restrict pos_z = patch->patchDataSOA.pos_z;
2868  int numAtoms = patch->patchDataSOA.numAtoms;
2869 #endif
2870  for (int i=0; i < numAtoms; i++) {
2871  pos_x[i] += vel_x[i] * dt;
2872  pos_y[i] += vel_y[i] * dt;
2873  pos_z[i] += vel_z[i] * dt;
2874  }
2875 #if 0
2876  if(this->patch->getPatchID() == 538){
2877  fprintf(stderr, "New Positions %lf %lf %lf\n", pos_x[43], pos_y[43], pos_z[43]);
2878  fprintf(stderr, "New Velocities %lf %lf %lf\n", vel_x[43], vel_y[43], vel_z[43]);
2879  }
2880 #endif
2881 
2882 }
2883 
2884 
2886 #ifndef SOA_SIMPLIFY_PARAMS
2887  const int * __restrict hydrogenGroupSize,
2888  const float * __restrict mass,
2889  const double * __restrict vel_x,
2890  const double * __restrict vel_y,
2891  const double * __restrict vel_z,
2892  int numAtoms
2893 #endif
2894  ) {
2895  NAMD_EVENT_RANGE_2(patch->flags.event_on,
2896  NamdProfileEvent::SUBMIT_HALFSTEP_SOA);
2897 #ifdef SOA_SIMPLIFY_PARAMS
2898  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
2899  const float * __restrict mass = patch->patchDataSOA.mass;
2900  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
2901  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
2902  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
2903  int numAtoms = patch->patchDataSOA.numAtoms;
2904 #endif
2905  if ( 1 /* doKineticEnergy || patch->flags.doVirial */ ) {
2906  BigReal kineticEnergy = 0;
2907  Tensor virial;
2908  for (int i=0; i < numAtoms; i++) {
2909  // scalar kineticEnergy += mass[i] * vel[i]^2
2910  kineticEnergy += mass[i] *
2911  (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
2912  // tensor virial += mass[i] * outer_product(vel[i], vel[i])
2913  virial.xx += mass[i] * vel_x[i] * vel_x[i];
2914  virial.xy += mass[i] * vel_x[i] * vel_y[i];
2915  virial.xz += mass[i] * vel_x[i] * vel_z[i];
2916  virial.yx += mass[i] * vel_y[i] * vel_x[i];
2917  virial.yy += mass[i] * vel_y[i] * vel_y[i];
2918  virial.yz += mass[i] * vel_y[i] * vel_z[i];
2919  virial.zx += mass[i] * vel_z[i] * vel_x[i];
2920  virial.zy += mass[i] * vel_z[i] * vel_y[i];
2921  virial.zz += mass[i] * vel_z[i] * vel_z[i];
2922  }
2923  kineticEnergy *= 0.5 * 0.5;
2924  virial *= 0.5;
2925 
2927  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
2928  }
2929 
2930  if ( 1 /* doKineticEnergy || patch->flags.doVirial */ ) {
2931  BigReal intKineticEnergy = 0;
2932  Tensor intVirialNormal;
2933  int hgs;
2934  for (int i=0; i < numAtoms; i += hgs) {
2935  // find velocity of center-of-mass of hydrogen group
2936  // calculate mass-weighted velocity
2937  hgs = hydrogenGroupSize[i];
2938  BigReal m_cm = 0;
2939  BigReal v_cm_x = 0;
2940  BigReal v_cm_y = 0;
2941  BigReal v_cm_z = 0;
2942  for (int j = i; j < (i+hgs); j++) {
2943  m_cm += mass[j];
2944  v_cm_x += mass[j] * vel_x[j];
2945  v_cm_y += mass[j] * vel_y[j];
2946  v_cm_z += mass[j] * vel_z[j];
2947  }
2948  BigReal recip_m_cm = 1.0 / m_cm;
2949  v_cm_x *= recip_m_cm;
2950  v_cm_y *= recip_m_cm;
2951  v_cm_z *= recip_m_cm;
2952  // sum virial contributions wrt vel center-of-mass
2953  for (int j = i; j < (i+hgs); j++) {
2954  BigReal dv_x = vel_x[j] - v_cm_x;
2955  BigReal dv_y = vel_y[j] - v_cm_y;
2956  BigReal dv_z = vel_z[j] - v_cm_z;
2957  // scalar intKineticEnergy += mass[j] * dot_product(vel[j], dv)
2958  intKineticEnergy += mass[j] *
2959  (vel_x[j] * dv_x + vel_y[j] * dv_y + vel_z[j] * dv_z);
2960  // tensor intVirialNormal += mass[j] * outer_product(vel[j], dv)
2961  intVirialNormal.xx += mass[j] * vel_x[j] * dv_x;
2962  intVirialNormal.xy += mass[j] * vel_x[j] * dv_y;
2963  intVirialNormal.xz += mass[j] * vel_x[j] * dv_z;
2964  intVirialNormal.yx += mass[j] * vel_y[j] * dv_x;
2965  intVirialNormal.yy += mass[j] * vel_y[j] * dv_y;
2966  intVirialNormal.yz += mass[j] * vel_y[j] * dv_z;
2967  intVirialNormal.zx += mass[j] * vel_z[j] * dv_x;
2968  intVirialNormal.zy += mass[j] * vel_z[j] * dv_y;
2969  intVirialNormal.zz += mass[j] * vel_z[j] * dv_z;
2970  }
2971  }
2972  intKineticEnergy *= 0.5 * 0.5;
2973  intVirialNormal *= 0.5;
2975  += intKineticEnergy;
2976  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NORMAL,
2977  intVirialNormal);
2978  }
2979 }
2980 
2981 
2982 //
2983 // XXX
2984 //
2986 #ifndef SOA_SIMPLIFY_PARAMS
2987  const int * __restrict hydrogenGroupSize,
2988  const float * __restrict mass,
2989  const double * __restrict pos_x,
2990  const double * __restrict pos_y,
2991  const double * __restrict pos_z,
2992  const double * __restrict vel_x,
2993  const double * __restrict vel_y,
2994  const double * __restrict vel_z,
2995  const double * __restrict f_normal_x,
2996  const double * __restrict f_normal_y,
2997  const double * __restrict f_normal_z,
2998  const double * __restrict f_nbond_x,
2999  const double * __restrict f_nbond_y,
3000  const double * __restrict f_nbond_z,
3001  const double * __restrict f_slow_x,
3002  const double * __restrict f_slow_y,
3003  const double * __restrict f_slow_z,
3004  int numAtoms
3005 #endif
3006  ) {
3007  NAMD_EVENT_RANGE_2(patch->flags.event_on,
3008  NamdProfileEvent::SUBMIT_REDUCTIONS_SOA);
3009 #ifdef SOA_SIMPLIFY_PARAMS
3010  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
3011  const float * __restrict mass = patch->patchDataSOA.mass;
3012  const double * __restrict pos_x = patch->patchDataSOA.pos_x;
3013  const double * __restrict pos_y = patch->patchDataSOA.pos_y;
3014  const double * __restrict pos_z = patch->patchDataSOA.pos_z;
3015  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
3016  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
3017  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
3018  const double * __restrict f_normal_x = patch->patchDataSOA.f_normal_x;
3019  const double * __restrict f_normal_y = patch->patchDataSOA.f_normal_y;
3020  const double * __restrict f_normal_z = patch->patchDataSOA.f_normal_z;
3021  const double * __restrict f_nbond_x = patch->patchDataSOA.f_nbond_x;
3022  const double * __restrict f_nbond_y = patch->patchDataSOA.f_nbond_y;
3023  const double * __restrict f_nbond_z = patch->patchDataSOA.f_nbond_z;
3024  const double * __restrict f_slow_x = patch->patchDataSOA.f_slow_x;
3025  const double * __restrict f_slow_y = patch->patchDataSOA.f_slow_y;
3026  const double * __restrict f_slow_z = patch->patchDataSOA.f_slow_z;
3027  int numAtoms = patch->patchDataSOA.numAtoms;
3028 #endif
3029 
3030  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
3032 
3033  if ( 1 /* doKineticEnergy || doMomenta || patch->flags.doVirial */ ) {
3034  BigReal kineticEnergy = 0;
3035  BigReal momentum_x = 0;
3036  BigReal momentum_y = 0;
3037  BigReal momentum_z = 0;
3038  BigReal angularMomentum_x = 0;
3039  BigReal angularMomentum_y = 0;
3040  BigReal angularMomentum_z = 0;
3041  BigReal origin_x = patch->lattice.origin().x;
3042  BigReal origin_y = patch->lattice.origin().y;
3043  BigReal origin_z = patch->lattice.origin().z;
3044 
3045  // XXX pairInteraction
3046 
3047  for (int i=0; i < numAtoms; i++) {
3048 
3049  // scalar kineticEnergy += mass[i] * dot_product(vel[i], vel[i])
3050  kineticEnergy += mass[i] *
3051  (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
3052 
3053  // vector momentum += mass[i] * vel[i]
3054  momentum_x += mass[i] * vel_x[i];
3055  momentum_y += mass[i] * vel_y[i];
3056  momentum_z += mass[i] * vel_z[i];
3057 
3058  // vector dpos = pos[i] - origin
3059  BigReal dpos_x = pos_x[i] - origin_x;
3060  BigReal dpos_y = pos_y[i] - origin_y;
3061  BigReal dpos_z = pos_z[i] - origin_z;
3062 
3063  // vector angularMomentum += mass[i] * cross_product(dpos, vel[i])
3064  angularMomentum_x += mass[i] * (dpos_y*vel_z[i] - dpos_z*vel_y[i]);
3065  angularMomentum_y += mass[i] * (dpos_z*vel_x[i] - dpos_x*vel_z[i]);
3066  angularMomentum_z += mass[i] * (dpos_x*vel_y[i] - dpos_y*vel_x[i]);
3067  }
3068 
3069  // XXX missing Drude
3070 
3071  kineticEnergy *= 0.5;
3072  Vector momentum(momentum_x, momentum_y, momentum_z);
3073  Vector angularMomentum(angularMomentum_x, angularMomentum_y,
3074  angularMomentum_z);
3075 
3077  ADD_VECTOR_OBJECT(reduction,REDUCTION_MOMENTUM,momentum);
3078  ADD_VECTOR_OBJECT(reduction,REDUCTION_ANGULAR_MOMENTUM,angularMomentum);
3079  }
3080  // For non-Multigrator doKineticEnergy = 1 always
3081  if ( 1 /* doKineticEnergy || patch->flags.doVirial */ ) {
3082  BigReal intKineticEnergy = 0;
3083  Tensor intVirialNormal;
3084  Tensor intVirialNbond;
3085  Tensor intVirialSlow;
3086 
3087  int hgs = 1; // hydrogen group size
3088  for (int i=0; i < numAtoms; i += hgs) {
3089  hgs = hydrogenGroupSize[i];
3090  int j;
3091  BigReal m_cm = 0;
3092  BigReal r_cm_x = 0;
3093  BigReal r_cm_y = 0;
3094  BigReal r_cm_z = 0;
3095  BigReal v_cm_x = 0;
3096  BigReal v_cm_y = 0;
3097  BigReal v_cm_z = 0;
3098  for ( j = i; j < (i+hgs); ++j ) {
3099  m_cm += mass[j];
3100  r_cm_x += mass[j] * pos_x[j];
3101  r_cm_y += mass[j] * pos_y[j];
3102  r_cm_z += mass[j] * pos_z[j];
3103  v_cm_x += mass[j] * vel_x[j];
3104  v_cm_y += mass[j] * vel_y[j];
3105  v_cm_z += mass[j] * vel_z[j];
3106  }
3107  BigReal inv_m_cm = namd_reciprocal(m_cm);
3108  r_cm_x *= inv_m_cm;
3109  r_cm_y *= inv_m_cm;
3110  r_cm_z *= inv_m_cm;
3111  v_cm_x *= inv_m_cm;
3112  v_cm_y *= inv_m_cm;
3113  v_cm_z *= inv_m_cm;
3114 
3115  // XXX removed pairInteraction
3116  for ( j = i; j < (i+hgs); ++j ) {
3117  // XXX removed fixed atoms
3118 
3119  // vector vel[j] used twice below
3120  BigReal v_x = vel_x[j];
3121  BigReal v_y = vel_y[j];
3122  BigReal v_z = vel_z[j];
3123 
3124  // vector dv = vel[j] - v_cm
3125  BigReal dv_x = v_x - v_cm_x;
3126  BigReal dv_y = v_y - v_cm_y;
3127  BigReal dv_z = v_z - v_cm_z;
3128 
3129  // scalar intKineticEnergy += mass[j] * dot_product(v, dv)
3130  intKineticEnergy += mass[j] *
3131  (v_x * dv_x + v_y * dv_y + v_z * dv_z);
3132 
3133  // vector dr = pos[j] - r_cm
3134  BigReal dr_x = pos_x[j] - r_cm_x;
3135  BigReal dr_y = pos_y[j] - r_cm_y;
3136  BigReal dr_z = pos_z[j] - r_cm_z;
3137 
3138  // tensor intVirialNormal += outer_product(f_normal[j], dr)
3139  intVirialNormal.xx += f_normal_x[j] * dr_x;
3140  intVirialNormal.xy += f_normal_x[j] * dr_y;
3141  intVirialNormal.xz += f_normal_x[j] * dr_z;
3142  intVirialNormal.yx += f_normal_y[j] * dr_x;
3143  intVirialNormal.yy += f_normal_y[j] * dr_y;
3144  intVirialNormal.yz += f_normal_y[j] * dr_z;
3145  intVirialNormal.zx += f_normal_z[j] * dr_x;
3146  intVirialNormal.zy += f_normal_z[j] * dr_y;
3147  intVirialNormal.zz += f_normal_z[j] * dr_z;
3148 
3149  // tensor intVirialNbond += outer_product(f_nbond[j], dr)
3150  intVirialNbond.xx += f_nbond_x[j] * dr_x;
3151  intVirialNbond.xy += f_nbond_x[j] * dr_y;
3152  intVirialNbond.xz += f_nbond_x[j] * dr_z;
3153  intVirialNbond.yx += f_nbond_y[j] * dr_x;
3154  intVirialNbond.yy += f_nbond_y[j] * dr_y;
3155  intVirialNbond.yz += f_nbond_y[j] * dr_z;
3156  intVirialNbond.zx += f_nbond_z[j] * dr_x;
3157  intVirialNbond.zy += f_nbond_z[j] * dr_y;
3158  intVirialNbond.zz += f_nbond_z[j] * dr_z;
3159 
3160  // tensor intVirialSlow += outer_product(f_slow[j], dr)
3161  intVirialSlow.xx += f_slow_x[j] * dr_x;
3162  intVirialSlow.xy += f_slow_x[j] * dr_y;
3163  intVirialSlow.xz += f_slow_x[j] * dr_z;
3164  intVirialSlow.yx += f_slow_y[j] * dr_x;
3165  intVirialSlow.yy += f_slow_y[j] * dr_y;
3166  intVirialSlow.yz += f_slow_y[j] * dr_z;
3167  intVirialSlow.zx += f_slow_z[j] * dr_x;
3168  intVirialSlow.zy += f_slow_z[j] * dr_y;
3169  intVirialSlow.zz += f_slow_z[j] * dr_z;
3170  }
3171  }
3172 
3173  intKineticEnergy *= 0.5;
3174 
3176  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
3177  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NBOND,intVirialNbond);
3178  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_SLOW,intVirialSlow);
3179  }
3180  // XXX removed pressure profile
3181 
3182  // XXX removed fixed atoms
3183 
3184  reduction->submit();
3185 
3186  // XXX removed pressure profile reduction
3187 }
3188 
3189 
3190 void Sequencer::submitCollections_SOA(int step, int zeroVel /* = 0 */)
3191 {
3192  //
3193  // Copy updates of SOA back into AOS for collections.
3194  //
3195  // XXX Could update positions and velocities separately.
3196  //
3197  NAMD_EVENT_RANGE_2(patch->flags.event_on,
3198  NamdProfileEvent::SUBMIT_COLLECTIONS_SOA);
3199  //
3200  // XXX Poor implementation here!
3201  // The selector functions called below in Output.C are
3202  // doing several tests and in an average use case calculating
3203  // at least two mod functions.
3204  //
3205  // However, most steps are NOT output steps!
3206  //
3207  int is_pos_needed;
3208  int dcdIndex;
3209  std::tie(is_pos_needed, dcdIndex)= Output::coordinateNeeded(step);
3210  int is_vel_needed = Output::velocityNeeded(step);
3211  int is_f_needed = Output::forceNeeded(step);
3212  if (!simParams->useDeviceMigration) { // This is already done for GPU migration
3213  if ( is_pos_needed || is_vel_needed ) {
3214  patch->copy_updates_to_AOS();
3215  }
3216  }
3217  if (is_f_needed) {
3223  patch->copy_forces_to_AOS();
3224  }
3225  if ( is_pos_needed ) {
3226  collection->submitPositions(step,patch->atom,patch->lattice,is_pos_needed,dcdIndex);
3227  }
3228  if ( is_vel_needed ) {
3229  collection->submitVelocities(step,zeroVel,patch->atom,is_vel_needed);
3230  }
3231  if ( is_f_needed ) {
3232  int maxForceUsed = patch->flags.maxForceUsed;
3233  if ( maxForceUsed > Results::slow ) maxForceUsed = Results::slow;
3234  collection->submitForces(step,patch->atom,maxForceUsed,patch->f,is_f_needed);
3235  }
3236 }
3237 
3238 
3240  const double dt,
3241  const double maxvel2
3242 #ifndef SOA_SIMPLIFY_PARAMS
3243  ,
3244  const double * __restrict vel_x,
3245  const double * __restrict vel_y,
3246  const double * __restrict vel_z,
3247  int numAtoms
3248 #endif
3249  ) {
3250  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::MAXIMUM_MOVE_SOA);
3251 #ifdef SOA_SIMPLIFY_PARAMS
3252  const double * __restrict vel_x = patch->patchDataSOA.vel_x;
3253  const double * __restrict vel_y = patch->patchDataSOA.vel_y;
3254  const double * __restrict vel_z = patch->patchDataSOA.vel_z;
3255  int numAtoms = patch->patchDataSOA.numAtoms;
3256 #endif
3257 
3258  // XXX missing maximum move
3259 
3260  // Loop vectorizes when replacing logical OR with summing.
3261  int killme = 0;
3262  for (int i=0; i < numAtoms; i++) {
3263  BigReal vel2 =
3264  vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3265  killme = killme + ( vel2 > maxvel2 );
3266  }
3267  if (killme) {
3268  // Found at least one atom that is moving too fast.
3269  // Terminating, so loop performance below doesn't matter.
3270  // Loop does not vectorize.
3271  killme = 0;
3272  for (int i=0; i < numAtoms; i++) {
3273  BigReal vel2 =
3274  vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3275  if (vel2 > maxvel2) {
3276  const FullAtom *a = patch->atom.begin();
3277  const Vector vel(vel_x[i], vel_y[i], vel_z[i]);
3278  const BigReal maxvel = sqrt(maxvel2);
3279  ++killme;
3280  iout << iERROR << "Atom " << (a[i].id + 1) << " velocity is "
3281  << ( PDBVELFACTOR * vel ) << " (limit is "
3282  << ( PDBVELFACTOR * maxvel ) << ", atom "
3283  << i << " of " << numAtoms << " on patch "
3284  << patch->patchID << " pe " << CkMyPe() << ")\n" << endi;
3285  }
3286  }
3287  iout << iERROR <<
3288  "Atoms moving too fast; simulation has become unstable ("
3289  << killme << " atoms on patch " << patch->patchID
3290  << " pe " << CkMyPe() << ").\n" << endi;
3292  terminate();
3293  }
3294 }
3295 
3296 
3298  BigReal timestep
3299 #ifndef SOA_SIMPLIFY_PARAMS
3300  ,
3301  const float * __restrict langevinParam,
3302  double * __restrict vel_x,
3303  double * __restrict vel_y,
3304  double * __restrict vel_z,
3305  int numAtoms
3306 #endif
3307  ) {
3308  NAMD_EVENT_RANGE_2(patch->flags.event_on,
3309  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1_SOA);
3310 #ifdef SOA_SIMPLIFY_PARAMS
3311  const float * __restrict langevinParam = patch->patchDataSOA.langevinParam;
3312  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3313  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3314  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3315  int numAtoms = patch->patchDataSOA.numAtoms;
3316 #endif
3317  if ( simParams->langevinOn /* && !simParams->langevin_useBAOAB */ )
3318  {
3319  // scale by TIMEFACTOR to convert to fs and then by 0.001 to ps
3320  // multiply by the Langevin damping coefficient, units 1/ps
3321  // XXX we could instead store time-scaled Langevin parameters
3322  BigReal dt = timestep * (0.001 * TIMEFACTOR);
3323 
3324  // XXX missing Drude
3325 
3326  //
3327  // The conditional inside loop prevents vectorization and doesn't
3328  // avoid much work since addition and multiplication are cheap.
3329  //
3330  for (int i=0; i < numAtoms; i++) {
3331  BigReal dt_gamma = dt * langevinParam[i];
3332  //if ( ! dt_gamma ) continue;
3333 
3334  BigReal scaling = 1. - 0.5 * dt_gamma;
3335  vel_x[i] *= scaling;
3336  vel_y[i] *= scaling;
3337  vel_z[i] *= scaling;
3338  }
3339  } // end if langevinOn
3340 }
3341 
3342 
3344  BigReal timestep
3345 #ifndef SOA_SIMPLIFY_PARAMS
3346  ,
3347  const float * __restrict langevinParam,
3348  const float * __restrict langScalVelBBK2,
3349  const float * __restrict langScalRandBBK2,
3350  float * __restrict gaussrand_x,
3351  float * __restrict gaussrand_y,
3352  float * __restrict gaussrand_z,
3353  double * __restrict vel_x,
3354  double * __restrict vel_y,
3355  double * __restrict vel_z,
3356  int numAtoms
3357 #endif
3358  )
3359 {
3360  NAMD_EVENT_RANGE_2(patch->flags.event_on,
3361  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2_SOA);
3362 #ifdef SOA_SIMPLIFY_PARAMS
3363  const float * __restrict langevinParam = patch->patchDataSOA.langevinParam;
3364  const float * __restrict langScalVelBBK2 = patch->patchDataSOA.langScalVelBBK2;
3365  const float * __restrict langScalRandBBK2 = patch->patchDataSOA.langScalRandBBK2;
3366  float * __restrict gaussrand_x = patch->patchDataSOA.gaussrand_x;
3367  float * __restrict gaussrand_y = patch->patchDataSOA.gaussrand_y;
3368  float * __restrict gaussrand_z = patch->patchDataSOA.gaussrand_z;
3369  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3370  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3371  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3372  int numAtoms = patch->patchDataSOA.numAtoms;
3373 #endif
3374  if ( simParams->langevinOn /* && !simParams->langevin_useBAOAB */ )
3375  {
3376  // XXX missing Drude
3377 
3378  // Scale by TIMEFACTOR to convert to fs and then by 0.001 to ps
3379  // multiply by the Langevin damping coefficient, units 1/ps.
3380  // XXX we could instead store time-scaled Langevin parameters
3381  BigReal dt = timestep * (0.001 * TIMEFACTOR);
3382  // Buffer the Gaussian random numbers
3384  // Must re-satisfy constraints if Langevin gammas differ.
3385  // (conserve momentum?)
3386  TIMER_START(patch->timerSet, RATTLE1);
3387  rattle1_SOA(timestep, 1);
3388  TIMER_STOP(patch->timerSet, RATTLE1);
3389  //
3390  // We don't need random numbers for atoms such that gamma=0.
3391  // If gammas differ, the likely case is that we aren't applying
3392  // Langevin damping to hydrogen, making those langevinParam=0,
3393  // in which case we need only numAtoms/3 random vectors.
3394  //
3395  // XXX can refine code below, count in advance how many
3396  // random numbers we need to use Random array filling routine
3397  //
3398  // XXX Loop does not vectorize!
3399  for (int i=0; i < numAtoms; i++) {
3400  Vector rg; // = 0
3401  if (langevinParam[i] != 0) rg = random->gaussian_vector();
3402  gaussrand_x[i] = float(rg.x);
3403  gaussrand_y[i] = float(rg.y);
3404  gaussrand_z[i] = float(rg.z);
3405  }
3406  }
3407  else {
3408  // Need to completely fill random number arrays.
3409  random->gaussian_array_f(gaussrand_x, numAtoms);
3410  random->gaussian_array_f(gaussrand_y, numAtoms);
3411  random->gaussian_array_f(gaussrand_z, numAtoms);
3412  }
3413 
3414  // do the velocity updates
3415  for (int i=0; i < numAtoms; i++) {
3416  vel_x[i] += gaussrand_x[i] * langScalRandBBK2[i];
3417  vel_y[i] += gaussrand_y[i] * langScalRandBBK2[i];
3418  vel_z[i] += gaussrand_z[i] * langScalRandBBK2[i];
3419  vel_x[i] *= langScalVelBBK2[i];
3420  vel_y[i] *= langScalVelBBK2[i];
3421  vel_z[i] *= langScalVelBBK2[i];
3422  }
3423  } // end if langevinOn
3424 }
3425 
3427 #ifndef SOA_SIMPLIFY_PARAMS
3428  const int * __restrict hydrogenGroupSize,
3429  const float * __restrict mass,
3430  double * __restrict pos_x,
3431  double * __restrict pos_y,
3432  double * __restrict pos_z,
3433  int numAtoms,
3434 #endif
3435  int step)
3436 {
3437 #ifdef SOA_SIMPLIFY_PARAMS
3438  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
3439  const float * __restrict mass = patch->patchDataSOA.mass;
3440  double * __restrict pos_x = patch->patchDataSOA.pos_x;
3441  double * __restrict pos_y = patch->patchDataSOA.pos_y;
3442  double * __restrict pos_z = patch->patchDataSOA.pos_z;
3443  int numAtoms = patch->patchDataSOA.numAtoms;
3444 #endif
3445 
3446  //
3447  // Loops below simplify if we lift out special cases of fixed atoms
3448  // and pressure excluded atoms and make them their own branch.
3449  //
3450 
3454  // Blocking receive for the updated lattice scaling factor.
3455  Tensor factor = broadcast->positionRescaleFactor.get(step);
3456  patch->lattice.rescale(factor);
3457  Vector origin = patch->lattice.origin();
3458 
3459  if ( simParams->useGroupPressure ) {
3460  int hgs;
3461  for (int i = 0; i < numAtoms; i += hgs) {
3462  int j;
3463  hgs = hydrogenGroupSize[i];
3464  // missing fixed atoms implementation
3465  BigReal m_cm = 0;
3466  BigReal r_cm_x = 0;
3467  BigReal r_cm_y = 0;
3468  BigReal r_cm_z = 0;
3469  // calculate the center of mass
3470  for ( j = i; j < (i+hgs); ++j ) {
3471  m_cm += mass[j];
3472  r_cm_x += mass[j] * pos_x[j];
3473  r_cm_y += mass[j] * pos_y[j];
3474  r_cm_z += mass[j] * pos_z[j];
3475  }
3476  BigReal inv_m_cm = namd_reciprocal(m_cm);
3477  r_cm_x *= inv_m_cm;
3478  r_cm_y *= inv_m_cm;
3479  r_cm_z *= inv_m_cm;
3480  // scale the center of mass with factor
3481  // shift to origin
3482  double tx = r_cm_x - origin.x;
3483  double ty = r_cm_y - origin.y;
3484  double tz = r_cm_z - origin.z;
3485  // apply transformation
3486  double new_r_cm_x = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3487  double new_r_cm_y = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3488  double new_r_cm_z = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3489  // shift back
3490  new_r_cm_x += origin.x;
3491  new_r_cm_y += origin.y;
3492  new_r_cm_z += origin.z;
3493  // translation vector from old COM and new COM
3494  double delta_r_cm_x = new_r_cm_x - r_cm_x;
3495  double delta_r_cm_y = new_r_cm_y - r_cm_y;
3496  double delta_r_cm_z = new_r_cm_z - r_cm_z;
3497  // shift the hydrogen group with translation vector
3498  for (j = i; j < (i+hgs); ++j) {
3499  pos_x[j] += delta_r_cm_x;
3500  pos_y[j] += delta_r_cm_y;
3501  pos_z[j] += delta_r_cm_z;
3502  }
3503  }
3504  } else {
3505  for (int i = 0; i < numAtoms; ++i) {
3506  // missing fixed atoms implementation
3507  // scale the coordinates with factor
3508  // shift to origin
3509  double tx = pos_x[i] - origin.x;
3510  double ty = pos_y[i] - origin.y;
3511  double tz = pos_z[i] - origin.z;
3512  // apply transformation
3513  double ftx = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3514  double fty = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3515  double ftz = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3516  // shift back
3517  pos_x[i] = ftx + origin.x;
3518  pos_y[i] = fty + origin.y;
3519  pos_z[i] = ftz + origin.z;
3520  }
3521  }
3522  }
3523 }
3524 
3526 #ifndef SOA_SIMPLIFY_PARAMS
3527  const int * __restrict hydrogenGroupSize,
3528  const float * __restrict mass,
3529  double * __restrict pos_x,
3530  double * __restrict pos_y,
3531  double * __restrict pos_z,
3532  double * __restrict vel_x,
3533  double * __restrict vel_y,
3534  double * __restrict vel_z,
3535  int numAtoms,
3536 #endif
3537  int step
3538  )
3539 {
3540 #ifdef SOA_SIMPLIFY_PARAMS
3541  const int * __restrict hydrogenGroupSize = patch->patchDataSOA.hydrogenGroupSize;
3542  const float * __restrict mass = patch->patchDataSOA.mass;
3543  double * __restrict pos_x = patch->patchDataSOA.pos_x;
3544  double * __restrict pos_y = patch->patchDataSOA.pos_y;
3545  double * __restrict pos_z = patch->patchDataSOA.pos_z;
3546  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3547  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3548  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3549  int numAtoms = patch->patchDataSOA.numAtoms;
3550 #endif
3551 
3552  //
3553  // Loops below simplify if we lift out special cases of fixed atoms
3554  // and pressure excluded atoms and make them their own branch.
3555  //
3556 
3557  // Blocking receive for the updated lattice scaling factor.
3558 
3559  Tensor factor = broadcast->positionRescaleFactor.get(step);
3560 
3561  TIMER_START(patch->timerSet, PISTON);
3562  // JCP FIX THIS!!!
3563  double velFactor_x = namd_reciprocal(factor.xx);
3564  double velFactor_y = namd_reciprocal(factor.yy);
3565  double velFactor_z = namd_reciprocal(factor.zz);
3566  patch->lattice.rescale(factor);
3567  Vector origin = patch->lattice.origin();
3568  if ( simParams->useGroupPressure ) {
3569  int hgs;
3570  for (int i=0; i < numAtoms; i += hgs) {
3571  int j;
3572  hgs = hydrogenGroupSize[i];
3573  // missing fixed atoms
3574  BigReal m_cm = 0;
3575  BigReal r_cm_x = 0;
3576  BigReal r_cm_y = 0;
3577  BigReal r_cm_z = 0;
3578  BigReal v_cm_x = 0;
3579  BigReal v_cm_y = 0;
3580  BigReal v_cm_z = 0;
3581  for ( j = i; j < (i+hgs); ++j ) {
3582  m_cm += mass[j];
3583  r_cm_x += mass[j] * pos_x[j];
3584  r_cm_y += mass[j] * pos_y[j];
3585  r_cm_z += mass[j] * pos_z[j];
3586  v_cm_x += mass[j] * vel_x[j];
3587  v_cm_y += mass[j] * vel_y[j];
3588  v_cm_z += mass[j] * vel_z[j];
3589  }
3590  BigReal inv_m_cm = namd_reciprocal(m_cm);
3591  r_cm_x *= inv_m_cm;
3592  r_cm_y *= inv_m_cm;
3593  r_cm_z *= inv_m_cm;
3594 
3595  double tx = r_cm_x - origin.x;
3596  double ty = r_cm_y - origin.y;
3597  double tz = r_cm_z - origin.z;
3598  double new_r_cm_x = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3599  double new_r_cm_y = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3600  double new_r_cm_z = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3601  new_r_cm_x += origin.x;
3602  new_r_cm_y += origin.y;
3603  new_r_cm_z += origin.z;
3604 
3605  double delta_r_cm_x = new_r_cm_x - r_cm_x;
3606  double delta_r_cm_y = new_r_cm_y - r_cm_y;
3607  double delta_r_cm_z = new_r_cm_z - r_cm_z;
3608  v_cm_x *= inv_m_cm;
3609  v_cm_y *= inv_m_cm;
3610  v_cm_z *= inv_m_cm;
3611  double delta_v_cm_x = ( velFactor_x - 1 ) * v_cm_x;
3612  double delta_v_cm_y = ( velFactor_y - 1 ) * v_cm_y;
3613  double delta_v_cm_z = ( velFactor_z - 1 ) * v_cm_z;
3614  for (j = i; j < (i+hgs); j++) {
3615  pos_x[j] += delta_r_cm_x;
3616  pos_y[j] += delta_r_cm_y;
3617  pos_z[j] += delta_r_cm_z;
3618  vel_x[j] += delta_v_cm_x;
3619  vel_y[j] += delta_v_cm_y;
3620  vel_z[j] += delta_v_cm_z;
3621  }
3622  // if (i < 10)
3623  // printf("cpu: %d, %f, %f, %f, %f, %f, %f\n", i,
3624  // pos_x[i], pos_y[i], pos_z[i],
3625  // vel_x[i], vel_y[i], vel_z[i]);
3626  }
3627  }
3628  else {
3629  for (int i=0; i < numAtoms; i++) {
3630  double tx = pos_x[i] - origin.x;
3631  double ty = pos_y[i] - origin.y;
3632  double tz = pos_z[i] - origin.z;
3633  double ftx = factor.xx*tx + factor.xy*ty + factor.xz*tz;
3634  double fty = factor.yx*tx + factor.yy*ty + factor.yz*tz;
3635  double ftz = factor.zx*tx + factor.zy*ty + factor.zz*tz;
3636  pos_x[i] = ftx + origin.x;
3637  pos_y[i] = fty + origin.y;
3638  pos_z[i] = ftz + origin.z;
3639  vel_x[i] *= velFactor_x;
3640  vel_y[i] *= velFactor_y;
3641  vel_z[i] *= velFactor_z;
3642  // if (i < 10)
3643  // printf("cpu: %d, %f, %f, %f, %f, %f, %f\n", i,
3644  // pos_x[i], pos_y[i], pos_z[i],
3645  // vel_x[i], vel_y[i], vel_z[i]);
3646  }
3647  }
3648  TIMER_STOP(patch->timerSet, PISTON);
3649  // exit(0);
3650 }
3651 
3652 
3653 // timestep scaled by 1/TIMEFACTOR
3654 void Sequencer::rattle1_SOA(BigReal timestep, int pressure)
3655 {
3656  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::RATTLE1_SOA);
3657  if ( simParams->rigidBonds != RIGID_NONE ) {
3658  Tensor virial;
3659  Tensor *vp = ( pressure ? &virial : 0 );
3660  // XXX pressureProfileReduction == NULL?
3661  if ( patch->rattle1_SOA(timestep, vp, pressureProfileReduction) ) {
3662  iout << iERROR <<
3663  "Constraint failure; simulation has become unstable.\n" << endi;
3665  terminate();
3666  }
3667  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
3668  }
3669 }
3670 
3671 void Sequencer::runComputeObjects_SOA(int migration, int pairlists, int nstep)
3672 {
3673  if ( migration ) pairlistsAreValid = 0;
3674 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) || defined(NAMD_MIC)
3675  if ( pairlistsAreValid &&
3677  && ( pairlistsAge > pairlistsAgeLimit ) ) {
3678  pairlistsAreValid = 0;
3679  }
3680 #else
3682  pairlistsAreValid = 0;
3683  }
3684 #endif
3685  if ( ! simParams->usePairlists ) pairlists = 0;
3686  patch->flags.usePairlists = pairlists || pairlistsAreValid;
3687  patch->flags.savePairlists = pairlists && ! pairlistsAreValid;
3688 
3689 #if defined(NTESTPID)
3690  if (1 && patch->patchID == NTESTPID) {
3691  int step = patch->flags.step;
3692  int numAtoms = patch->numAtoms;
3693  double *xyzq = new double[4*numAtoms];
3694  double *x = patch->patchDataSOA.pos_x;
3695  double *y = patch->patchDataSOA.pos_y;
3696  double *z = patch->patchDataSOA.pos_z;
3697  float *q = patch->patchDataSOA.charge;
3698  for (int i=0; i < numAtoms; i++) {
3699  xyzq[4*i ] = x[i];
3700  xyzq[4*i+1] = y[i];
3701  xyzq[4*i+2] = z[i];
3702  xyzq[4*i+3] = q[i];
3703  }
3704  char fname[128], remark[128];
3705  sprintf(fname, "xyzq_soa_pid%d_step%d.bin", NTESTPID, step);
3706  sprintf(remark, "SOA xyzq, patch %d, step %d", NTESTPID, step);
3707  TestArray_write<double>(fname, remark, xyzq, 4*numAtoms);
3708  delete[] xyzq;
3709  }
3710 #endif
3711  // Zero all SOA global forces before computing force
3712  patch->zero_global_forces_SOA();
3713  patch->positionsReady_SOA(migration); // updates flags.sequence
3714 
3715  int seq = patch->flags.sequence;
3716  int basePriority = ( (seq & 0xffff) << 15 )
3718 
3719  // XXX missing GBIS
3720  priority = basePriority + COMPUTE_HOME_PRIORITY;
3721  //char prbuf[32];
3722  //sprintf(prbuf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::SEQ_SUSPEND], patch->getPatchID());
3723  //NAMD_EVENT_START_EX(1, NamdProfileEvent::SEQ_SUSPEND, prbuf);
3724  suspend(); // until all deposit boxes close
3725  //NAMD_EVENT_STOP(1, NamdProfileEvent::SEQ_SUSPEND);
3726 
3727 #ifdef NODEGROUP_FORCE_REGISTER
3728  if(!simParams->CUDASOAintegrate || migration){
3729  patch->copy_forces_to_SOA();
3730  }
3731 #else
3732  patch->copy_forces_to_SOA();
3733 #endif
3734 
3735 #if defined(NTESTPID)
3736  if (1 && patch->patchID == NTESTPID) {
3737  int step = patch->flags.step;
3738  int numAtoms = patch->numAtoms;
3739  char fname[128];
3740  char remark[128];
3741  double *fxyz = new double[3*numAtoms];
3742  double *fx = patch->patchDataSOA.f_normal_x;
3743  double *fy = patch->patchDataSOA.f_normal_y;
3744  double *fz = patch->patchDataSOA.f_normal_z;
3745  for (int i=0; i < numAtoms; i++) {
3746  fxyz[3*i ] = fx[i];
3747  fxyz[3*i+1] = fy[i];
3748  fxyz[3*i+2] = fz[i];
3749  }
3750  sprintf(fname, "fxyz_normal_soa_pid%d_step%d.bin", NTESTPID, step);
3751  sprintf(remark, "SOA fxyz normal, patch %d, step %d", NTESTPID, step);
3752  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3753  fx = patch->patchDataSOA.f_nbond_x;
3754  fy = patch->patchDataSOA.f_nbond_y;
3755  fz = patch->patchDataSOA.f_nbond_z;
3756  for (int i=0; i < numAtoms; i++) {
3757  fxyz[3*i ] = fx[i];
3758  fxyz[3*i+1] = fy[i];
3759  fxyz[3*i+2] = fz[i];
3760  }
3761  sprintf(fname, "fxyz_nbond_soa_pid%d_step%d.bin", NTESTPID, step);
3762  sprintf(remark, "SOA fxyz nonbonded, patch %d, step %d", NTESTPID, step);
3763  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3764  fx = patch->patchDataSOA.f_slow_x;
3765  fy = patch->patchDataSOA.f_slow_y;
3766  fz = patch->patchDataSOA.f_slow_z;
3767  for (int i=0; i < numAtoms; i++) {
3768  fxyz[3*i ] = fx[i];
3769  fxyz[3*i+1] = fy[i];
3770  fxyz[3*i+2] = fz[i];
3771  }
3772  sprintf(fname, "fxyz_slow_soa_pid%d_step%d.bin", NTESTPID, step);
3773  sprintf(remark, "SOA fxyz slow, patch %d, step %d", NTESTPID, step);
3774  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3775  delete[] fxyz;
3776  }
3777 #endif
3778 
3779 #if 0
3780  if (1 && patch->patchID == 0) {
3781  int numAtoms = patch->numAtoms;
3782  double *fxyz = new double[3*numAtoms];
3783  double *fx, *fy, *fz;
3784  char fname[64], remark[128];
3785  int step = patch->flags.step;
3786 
3787  fx = patch->patchDataSOA.f_slow_x;
3788  fy = patch->patchDataSOA.f_slow_y;
3789  fz = patch->patchDataSOA.f_slow_z;
3790  for (int i=0; i < numAtoms; i++) {
3791  fxyz[3*i ] = fx[i];
3792  fxyz[3*i+1] = fy[i];
3793  fxyz[3*i+2] = fz[i];
3794  }
3795  sprintf(fname, "fslow_soa_%d.bin", step);
3796  sprintf(remark, "SOA slow forces, step %d\n", step);
3797  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3798 
3799  fx = patch->patchDataSOA.f_nbond_x;
3800  fy = patch->patchDataSOA.f_nbond_y;
3801  fz = patch->patchDataSOA.f_nbond_z;
3802  for (int i=0; i < numAtoms; i++) {
3803  fxyz[3*i ] = fx[i];
3804  fxyz[3*i+1] = fy[i];
3805  fxyz[3*i+2] = fz[i];
3806  }
3807  sprintf(fname, "fnbond_soa_%d.bin", step);
3808  sprintf(remark, "SOA nonbonded forces, step %d\n", step);
3809  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3810 
3811  fx = patch->patchDataSOA.f_normal_x;
3812  fy = patch->patchDataSOA.f_normal_y;
3813  fz = patch->patchDataSOA.f_normal_z;
3814  for (int i=0; i < numAtoms; i++) {
3815  fxyz[3*i ] = fx[i];
3816  fxyz[3*i+1] = fy[i];
3817  fxyz[3*i+2] = fz[i];
3818  }
3819  sprintf(fname, "fnormal_soa_%d.bin", step);
3820  sprintf(remark, "SOA normal forces, step %d\n", step);
3821  TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3822 
3823  delete[] fxyz;
3824  }
3825 #endif
3826 
3827 #if 0
3828  //Will print forces here after runComputeObjects
3829  if(nstep == 1){
3830  fprintf(stderr, "CPU force arrays for alanin\n" );
3831  for(int i = 0; i < patch->patchDataSOA.numAtoms; i++){
3832  fprintf(stderr, "f[%i] = %lf %lf %lf | %lf %lf %lf | %lf %lf %lf\n", i,
3833  patch->patchDataSOA.f_normal_x[i], patch->patchDataSOA.f_normal_y[i], patch->patchDataSOA.f_normal_z[i],
3834  patch->patchDataSOA.f_nbond_x[i], patch->patchDataSOA.f_nbond_y[i], patch->patchDataSOA.f_nbond_z[i],
3835  patch->patchDataSOA.f_slow_x[i], patch->patchDataSOA.f_slow_y[i], patch->patchDataSOA.f_slow_z[i]);
3836  }
3837  }
3838 #endif
3839 
3841  pairlistsAreValid = 1;
3842  pairlistsAge = 0;
3843  }
3844  // For multigrator, do not age pairlist during pressure step
3845  // NOTE: for non-multigrator pressureStep = 0 always
3846  if ( pairlistsAreValid /* && !pressureStep */ ) ++pairlistsAge;
3847 
3848  // XXX missing lonepairs
3849  // XXX missing Molly
3850  // XXX missing Lowe-Andersen
3851 }
3852 
3858 {
3861  double * __restrict vel_x = patch->patchDataSOA.vel_x;
3862  double * __restrict vel_y = patch->patchDataSOA.vel_y;
3863  double * __restrict vel_z = patch->patchDataSOA.vel_z;
3864  int numAtoms = patch->patchDataSOA.numAtoms;
3865  // Blocking receive for the temperature coupling coefficient.
3866  BigReal velrescaling = broadcast->stochRescaleCoefficient.get(step);
3867  DebugM(4, "stochastically rescaling velocities at step " << step << " by " << velrescaling << "\n");
3868  for ( int i = 0; i < numAtoms; ++i ) {
3869  vel_x[i] *= velrescaling;
3870  vel_y[i] *= velrescaling;
3871  vel_z[i] *= velrescaling;
3872  }
3873  stochRescale_count = 0;
3874  }
3875 }
3876 
3877 //
3878 // end SOA code
3879 //
3881 
3882 #endif // SEQUENCER_SOA
3883 
3884 
3885 extern int eventEndOfTimeStep;
3886 
3887 void Sequencer::integrate(int scriptTask) {
3888  char traceNote[24];
3889  char tracePrefix[20];
3890  sprintf(tracePrefix, "p:%d,s:",patch->patchID);
3891 // patch->write_tip4_props();
3892 
3893  //
3894  // DJH: Copy all data into SOA (structure of arrays)
3895  // from AOS (array of structures) data structure.
3896  //
3897  //patch->copy_all_to_SOA();
3898 
3899 #ifdef TIMER_COLLECTION
3900  TimerSet& t = patch->timerSet;
3901 #endif
3902  TIMER_INIT_WIDTH(t, KICK, simParams->timerBinWidth);
3903  TIMER_INIT_WIDTH(t, MAXMOVE, simParams->timerBinWidth);
3904  TIMER_INIT_WIDTH(t, DRIFT, simParams->timerBinWidth);
3905  TIMER_INIT_WIDTH(t, PISTON, simParams->timerBinWidth);
3906  TIMER_INIT_WIDTH(t, SUBMITHALF, simParams->timerBinWidth);
3907  TIMER_INIT_WIDTH(t, VELBBK1, simParams->timerBinWidth);
3908  TIMER_INIT_WIDTH(t, VELBBK2, simParams->timerBinWidth);
3909  TIMER_INIT_WIDTH(t, RATTLE1, simParams->timerBinWidth);
3910  TIMER_INIT_WIDTH(t, SUBMITFULL, simParams->timerBinWidth);
3911  TIMER_INIT_WIDTH(t, SUBMITCOLLECT, simParams->timerBinWidth);
3912 
3913  int &step = patch->flags.step;
3914  step = simParams->firstTimestep;
3915 
3916  // drag switches
3917  const Bool rotDragOn = simParams->rotDragOn;
3918  const Bool movDragOn = simParams->movDragOn;
3919 
3920  const int commOnly = simParams->commOnly;
3921 
3922  int &maxForceUsed = patch->flags.maxForceUsed;
3923  int &maxForceMerged = patch->flags.maxForceMerged;
3924  maxForceUsed = Results::normal;
3925  maxForceMerged = Results::normal;
3926 
3927  const int numberOfSteps = simParams->N;
3928  const int stepsPerCycle = simParams->stepsPerCycle;
3929  const BigReal timestep = simParams->dt;
3930 
3931  // what MTS method?
3932  const int staleForces = ( simParams->MTSAlgorithm == NAIVE );
3933 
3934  const int nonbondedFrequency = simParams->nonbondedFrequency;
3935  slowFreq = nonbondedFrequency;
3936  const BigReal nbondstep = timestep * (staleForces?1:nonbondedFrequency);
3937  int &doNonbonded = patch->flags.doNonbonded;
3938  doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
3939  if ( nonbondedFrequency == 1 ) maxForceMerged = Results::nbond;
3940  if ( doNonbonded ) maxForceUsed = Results::nbond;
3941 
3942  // Do we do full electrostatics?
3943  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
3944  const int fullElectFrequency = simParams->fullElectFrequency;
3945  if ( dofull ) slowFreq = fullElectFrequency;
3946  const BigReal slowstep = timestep * (staleForces?1:fullElectFrequency);
3947  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
3948  doFullElectrostatics = (dofull && ((step >= numberOfSteps) || !(step%fullElectFrequency)));
3949  if ( dofull && (fullElectFrequency == 1) && !(simParams->mollyOn) )
3950  maxForceMerged = Results::slow;
3951  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
3952 
3953  // If doing LJ-PME, track doFullElectrostatics
3954  int &doFullDispersion = patch->flags.doFullDispersion;
3956  doFullDispersion = (simParams->LJPMEOn && doFullElectrostatics);
3957 }
3958 
3959 //#ifndef UPPER_BOUND
3960  const Bool accelMDOn = simParams->accelMDOn;
3961  const Bool accelMDdihe = simParams->accelMDdihe;
3962  const Bool accelMDdual = simParams->accelMDdual;
3963  if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed = Results::amdf;
3964 
3965  // Is adaptive tempering on?
3966  const Bool adaptTempOn = simParams->adaptTempOn;
3968  if (simParams->langevinOn)
3970  else if (simParams->rescaleFreq > 0)
3972 
3973 
3974  int &doMolly = patch->flags.doMolly;
3975  doMolly = simParams->mollyOn && doFullElectrostatics;
3976  // BEGIN LA
3977  int &doLoweAndersen = patch->flags.doLoweAndersen;
3978  doLoweAndersen = simParams->loweAndersenOn && doNonbonded;
3979  // END LA
3980 
3981  int &doGBIS = patch->flags.doGBIS;
3982  doGBIS = simParams->GBISOn;
3983 
3984  int &doLCPO = patch->flags.doLCPO;
3985  doLCPO = simParams->LCPOOn;
3986 
3987  int zeroMomentum = simParams->zeroMomentum;
3988 
3989  // Do we need to return forces to TCL script or Colvar module?
3990  int doTcl = simParams->tclForcesOn;
3991  int doColvars = simParams->colvarsOn;
3992 //#endif
3993  int doGlobal = doTcl || doColvars;
3995 
3996  // Bother to calculate energies?
3997  int &doEnergy = patch->flags.doEnergy;
3998  int energyFrequency = simParams->computeEnergies;
3999 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
4000  if(simParams->alchOn) energyFrequency = NAMD_gcd(energyFrequency, simParams->alchOutFreq);
4001 #endif
4002 #ifndef UPPER_BOUND
4003  const int reassignFreq = simParams->reassignFreq;
4004 #endif
4005 
4006  int &doVirial = patch->flags.doVirial;
4007  doVirial = 1;
4008 
4009  if ( scriptTask == SCRIPT_RUN ) {
4010 
4011 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4012 
4013 #ifndef UPPER_BOUND
4014 // printf("Doing initial rattle\n");
4015 #ifndef UPPER_BOUND
4016 D_MSG("rattle1()");
4017  TIMER_START(t, RATTLE1);
4018  rattle1(0.,0); // enforce rigid bond constraints on initial positions
4019  TIMER_STOP(t, RATTLE1);
4020 #endif
4021 
4024  patch->atom.begin(),patch->atom.end());
4025  }
4026 
4027  if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4028  reassignVelocities(timestep,step);
4029  }
4030 #endif
4031 
4032  doEnergy = ! ( step % energyFrequency );
4033 #ifndef UPPER_BOUND
4034  if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4035  //Update energy every timestep for adaptive tempering
4036  if ( adaptTempOn ) doEnergy=1;
4037 #endif
4038 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4039 D_MSG("runComputeObjects()");
4040  runComputeObjects(1,step<numberOfSteps); // must migrate here!
4041 #ifndef UPPER_BOUND
4042  rescaleaccelMD(step, doNonbonded, doFullElectrostatics); // for accelMD
4043  adaptTempUpdate(step); // update adaptive tempering temperature
4044 #endif
4045 
4046 #ifndef UPPER_BOUND
4047  if ( staleForces || doGlobal ) {
4048  if ( doNonbonded ) saveForce(Results::nbond);
4049  if ( doFullElectrostatics ) saveForce(Results::slow);
4050  }
4051 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4052  if ( ! commOnly ) {
4053 D_MSG("newtonianVelocities()");
4054  TIMER_START(t, KICK);
4055  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,0,1,1);
4056  TIMER_STOP(t, KICK);
4057  }
4059 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4060 #ifndef UPPER_BOUND
4061 D_MSG("rattle1()");
4062  TIMER_START(t, RATTLE1);
4063  rattle1(-timestep,0);
4064  TIMER_STOP(t, RATTLE1);
4065 #endif
4066 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4067 D_MSG("submitHalfstep()");
4068  TIMER_START(t, SUBMITHALF);
4069  submitHalfstep(step);
4070  TIMER_STOP(t, SUBMITHALF);
4071 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4072  if ( ! commOnly ) {
4073 D_MSG("newtonianVelocities()");
4074  TIMER_START(t, KICK);
4075  newtonianVelocities(1.0,timestep,nbondstep,slowstep,0,1,1);
4076  TIMER_STOP(t, KICK);
4077  }
4078 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4079 D_MSG("rattle1()");
4080  TIMER_START(t, RATTLE1);
4081  rattle1(timestep,1);
4082  TIMER_STOP(t, RATTLE1);
4083  if (doGlobal) // include constraint forces
4084  computeGlobal->saveTotalForces(patch);
4085 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4086 D_MSG("submitHalfstep()");
4087  TIMER_START(t, SUBMITHALF);
4088  submitHalfstep(step);
4089  TIMER_STOP(t, SUBMITHALF);
4090  if ( zeroMomentum && doFullElectrostatics ) submitMomentum(step);
4091  if ( ! commOnly ) {
4092 D_MSG("newtonianVelocities()");
4093  TIMER_START(t, KICK);
4094  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,0,1,1);
4095  TIMER_STOP(t, KICK);
4096  }
4097 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4098 #endif
4099 D_MSG("submitReductions()");
4100  TIMER_START(t, SUBMITFULL);
4101  submitReductions(step);
4102  TIMER_STOP(t, SUBMITFULL);
4103 // print_vel_AOS(patch->atom.begin(), 0, patch->numAtoms);
4104 #ifndef UPPER_BOUND
4105  if(0){ // if(traceIsOn()){
4106  traceUserEvent(eventEndOfTimeStep);
4107  sprintf(traceNote, "%s%d",tracePrefix,step);
4108  traceUserSuppliedNote(traceNote);
4109  }
4110 #endif
4111  rebalanceLoad(step);
4112 
4113  } // scriptTask == SCRIPT_RUN
4114 
4115 #ifndef UPPER_BOUND
4116  bool doMultigratorRattle = false;
4117 #endif
4118 
4119  //
4120  // DJH: There are a lot of mod operations below and elsewhere to
4121  // test step number against the frequency of something happening.
4122  // Mod and integer division are expensive!
4123  // Might be better to replace with counters and test equality.
4124  //
4125 #if 0
4126  for(int i = 0; i < NamdProfileEvent::EventsCount; i++)
4127  CkPrintf("-------------- [%d] %s -------------\n", i, NamdProfileEventStr[i]);
4128 #endif
4129 
4130 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
4131  int& eon = patch->flags.event_on;
4132  int epid = (simParams->beginEventPatchID <= patch->getPatchID()
4133  && patch->getPatchID() <= simParams->endEventPatchID);
4134  int beginStep = simParams->beginEventStep;
4135  int endStep = simParams->endEventStep;
4136  bool controlProfiling = patch->getPatchID() == 0;
4137 #endif
4138 
4139  for ( ++step; step <= numberOfSteps; ++step )
4140  {
4141 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
4142  eon = epid && (beginStep < step && step <= endStep);
4143 
4144  if (controlProfiling && step == beginStep) {
4146  }
4147  if (controlProfiling && step == endStep) {
4149  }
4150  char buf[32];
4151  sprintf(buf, "%s: %d", NamdProfileEventStr[NamdProfileEvent::INTEGRATE_1], patch->getPatchID());
4152  NAMD_EVENT_START_EX(eon, NamdProfileEvent::INTEGRATE_1, buf);
4153 #endif
4154  DebugM(3,"for step "<<step<< " dGlobal " << doGlobal<<"\n"<<endi);
4155 #ifndef UPPER_BOUND
4156  rescaleVelocities(step);
4157  tcoupleVelocities(timestep,step);
4158  if ( simParams->stochRescaleOn ) {
4159  stochRescaleVelocities(step);
4160  }
4161  berendsenPressure(step);
4162 
4163  if ( ! commOnly ) {
4164  TIMER_START(t, KICK);
4165  newtonianVelocities(0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4166  TIMER_STOP(t, KICK);
4167  }
4168 
4169  // We do RATTLE here if multigrator thermostat was applied in the previous step
4170  if (doMultigratorRattle) rattle1(timestep, 1);
4171 
4172  /* reassignment based on half-step velocities
4173  if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4174  addVelocityToPosition(0.5*timestep);
4175  reassignVelocities(timestep,step);
4176  addVelocityToPosition(0.5*timestep);
4177  rattle1(0.,0);
4178  rattle1(-timestep,0);
4179  addVelocityToPosition(-1.0*timestep);
4180  rattle1(timestep,0);
4181  } */
4182 
4183  TIMER_START(t, MAXMOVE);
4184  maximumMove(timestep);
4185  TIMER_STOP(t, MAXMOVE);
4186 
4187  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_1); // integrate 1
4188 
4190  if ( ! commOnly ) {
4191  TIMER_START(t, DRIFT);
4192  addVelocityToPosition(0.5*timestep);
4193  TIMER_STOP(t, DRIFT);
4194  }
4195  // We add an Ornstein-Uhlenbeck integration step for the case of BAOAB (Langevin)
4196  langevinVelocities(timestep);
4197 
4198  // There is a blocking receive inside of langevinPiston()
4199  // that might suspend the current thread of execution,
4200  // so split profiling around this conditional block.
4201  langevinPiston(step);
4202 
4203  if ( ! commOnly ) {
4204  TIMER_START(t, DRIFT);
4205  addVelocityToPosition(0.5*timestep);
4206  TIMER_STOP(t, DRIFT);
4207  }
4208  } else {
4209  // If Langevin is not used, take full time step directly instread of two half steps
4210  if ( ! commOnly ) {
4211  TIMER_START(t, DRIFT);
4212  addVelocityToPosition(timestep);
4213  TIMER_STOP(t, DRIFT);
4214  }
4215  }
4216 
4217  NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_2);
4218 
4219  // impose hard wall potential for Drude bond length
4220  hardWallDrude(timestep, 1);
4221 
4223 #endif // UPPER_BOUND
4224 
4225  doNonbonded = !(step%nonbondedFrequency);
4226  doFullElectrostatics = (dofull && !(step%fullElectFrequency));
4228  // XXX in preparation for supporting LJ-PME with MTS
4229  doFullDispersion = (simParams->LJPMEOn && doFullElectrostatics);
4230 }
4231 
4232 #ifndef UPPER_BOUND
4233  if ( zeroMomentum && doFullElectrostatics ) {
4234  // There is a blocking receive inside of correctMomentum().
4235  correctMomentum(step,slowstep);
4236  }
4237 
4238  // There are NO sends in submitHalfstep() just local summation
4239  // into the Reduction struct.
4240  TIMER_START(t, SUBMITHALF);
4241  submitHalfstep(step);
4242  TIMER_STOP(t, SUBMITHALF);
4243 
4244  doMolly = simParams->mollyOn && doFullElectrostatics;
4245  // BEGIN LA
4246  doLoweAndersen = simParams->loweAndersenOn && doNonbonded;
4247  // END LA
4248 
4249  maxForceUsed = Results::normal;
4250  if ( doNonbonded ) maxForceUsed = Results::nbond;
4251  if ( doFullElectrostatics ) maxForceUsed = Results::slow;
4252  if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed = Results::amdf;
4253 
4254  // Migrate Atoms on stepsPerCycle
4255  doEnergy = ! ( step % energyFrequency );
4256  if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4257  if ( adaptTempOn ) doEnergy=1;
4258 
4259  // Multigrator
4260  if (simParams->multigratorOn) {
4261  doVirial = (!(step % energyFrequency) || ((simParams->outputPressure > 0) && !(step % simParams->outputPressure))
4262  || !(step % simParams->multigratorPressureFreq));
4263  doKineticEnergy = (!(step % energyFrequency) || !(step % simParams->multigratorTemperatureFreq));
4264  doMomenta = (simParams->outputMomenta > 0) && !(step % simParams->outputMomenta);
4265  } else {
4266  doVirial = 1;
4267  doKineticEnergy = 1;
4268  doMomenta = 1;
4269  }
4270 #endif
4271  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_2); // integrate 2
4272 
4273  // The current thread of execution will suspend in runComputeObjects().
4274  runComputeObjects(!(step%stepsPerCycle),step<numberOfSteps);
4275 
4276  NAMD_EVENT_START(eon, NamdProfileEvent::INTEGRATE_3);
4277 
4278 #ifndef UPPER_BOUND
4279  rescaleaccelMD(step, doNonbonded, doFullElectrostatics); // for accelMD
4280 
4281  if ( staleForces || doGlobal ) {
4282  if ( doNonbonded ) saveForce(Results::nbond);
4283  if ( doFullElectrostatics ) saveForce(Results::slow);
4284  }
4285 
4286  // reassignment based on full-step velocities
4287  if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4288  reassignVelocities(timestep,step);
4289  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4290  rattle1(-timestep,0);
4291  }
4292 
4293  if ( ! commOnly ) {
4294  TIMER_START(t, VELBBK1);
4295  langevinVelocitiesBBK1(timestep);
4296  TIMER_STOP(t, VELBBK1);
4297  TIMER_START(t, KICK);
4298  newtonianVelocities(1.0,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4299  TIMER_STOP(t, KICK);
4300  TIMER_START(t, VELBBK2);
4301  langevinVelocitiesBBK2(timestep);
4302  TIMER_STOP(t, VELBBK2);
4303  }
4304 
4305  // add drag to each atom's positions
4306  if ( ! commOnly && movDragOn ) addMovDragToPosition(timestep);
4307  if ( ! commOnly && rotDragOn ) addRotDragToPosition(timestep);
4308 
4309  TIMER_START(t, RATTLE1);
4310  rattle1(timestep,1);
4311  TIMER_STOP(t, RATTLE1);
4312  if (doGlobal) // include constraint forces
4313  computeGlobal->saveTotalForces(patch);
4314 
4315  TIMER_START(t, SUBMITHALF);
4316  submitHalfstep(step);
4317  TIMER_STOP(t, SUBMITHALF);
4318  if ( zeroMomentum && doFullElectrostatics ) submitMomentum(step);
4319 
4320  if ( ! commOnly ) {
4321  TIMER_START(t, KICK);
4322  newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4323  TIMER_STOP(t, KICK);
4324  }
4325 
4326  // rattle2(timestep,step);
4327 #endif
4328 
4329  TIMER_START(t, SUBMITFULL);
4330  submitReductions(step);
4331  TIMER_STOP(t, SUBMITFULL);
4332  TIMER_START(t, SUBMITCOLLECT);
4333  submitCollections(step);
4334  TIMER_STOP(t, SUBMITCOLLECT);
4335 #ifndef UPPER_BOUND
4336  //Update adaptive tempering temperature
4337  adaptTempUpdate(step);
4338 
4339  // Multigrator temperature and pressure steps
4340  multigratorTemperature(step, 1);
4341  multigratorPressure(step, 1);
4342  multigratorPressure(step, 2);
4343  multigratorTemperature(step, 2);
4344  doMultigratorRattle = (simParams->multigratorOn && !(step % simParams->multigratorTemperatureFreq));
4345 
4346  NAMD_EVENT_STOP(eon, NamdProfileEvent::INTEGRATE_3); // integrate 3
4347 #endif
4348 
4349 #if CYCLE_BARRIER
4350  cycleBarrier(!((step+1) % stepsPerCycle), step);
4351 #elif PME_BARRIER
4352  cycleBarrier(doFullElectrostatics, step);
4353 #elif STEP_BARRIER
4354  cycleBarrier(1, step);
4355 #endif
4356 
4357 #ifndef UPPER_BOUND
4358  if(Node::Object()->specialTracing || simParams->statsOn){
4359  int bstep = simParams->traceStartStep;
4360  int estep = bstep + simParams->numTraceSteps;
4361  if(step == bstep || step == estep){
4362  traceBarrier(step);
4363  }
4364  }
4365 
4366 #ifdef MEASURE_NAMD_WITH_PAPI
4367  if(simParams->papiMeasure) {
4368  int bstep = simParams->papiMeasureStartStep;
4369  int estep = bstep + simParams->numPapiMeasureSteps;
4370  if(step == bstep || step==estep) {
4371  papiMeasureBarrier(step);
4372  }
4373  }
4374 #endif
4375 
4376  if(0){ // if(traceIsOn()){
4377  traceUserEvent(eventEndOfTimeStep);
4378  sprintf(traceNote, "%s%d",tracePrefix,step);
4379  traceUserSuppliedNote(traceNote);
4380  }
4381 #endif // UPPER_BOUND
4382  rebalanceLoad(step);
4383 
4384 #if PME_BARRIER
4385  // a step before PME
4386  cycleBarrier(dofull && !((step+1)%fullElectFrequency),step);
4387 #endif
4388 
4389 #if USE_HPM
4390  if(step == START_HPM_STEP)
4391  (CProxy_Node(CkpvAccess(BOCclass_group).node)).startHPM();
4392 
4393  if(step == STOP_HPM_STEP)
4394  (CProxy_Node(CkpvAccess(BOCclass_group).node)).stopHPM();
4395 #endif
4396 
4397  }
4398 
4399  TIMER_DONE(t);
4400 #ifdef TIMER_COLLECTION
4401  if (patch->patchID == SPECIAL_PATCH_ID) {
4402  printf("Timer collection reporting in microseconds for "
4403  "Patch %d\n", patch->patchID);
4404  TIMER_REPORT(t);
4405  }
4406 #endif // TIMER_COLLECTION
4407  //
4408  // DJH: Copy updates of SOA back into AOS.
4409  //
4410  //patch->copy_updates_to_AOS();
4411 }
4412 
4413 // add moving drag to each atom's position
4415  FullAtom *atom = patch->atom.begin();
4416  int numAtoms = patch->numAtoms;
4417  Molecule *molecule = Node::Object()->molecule; // need its methods
4418  const BigReal movDragGlobVel = simParams->movDragGlobVel;
4419  const BigReal dt = timestep / TIMEFACTOR; // MUST be as in the integrator!
4420  Vector movDragVel, dragIncrement;
4421  for ( int i = 0; i < numAtoms; ++i )
4422  {
4423  // skip if fixed atom or zero drag attribute
4424  if ( (simParams->fixedAtomsOn && atom[i].atomFixed)
4425  || !(molecule->is_atom_movdragged(atom[i].id)) ) continue;
4426  molecule->get_movdrag_params(movDragVel, atom[i].id);
4427  dragIncrement = movDragGlobVel * movDragVel * dt;
4428  atom[i].position += dragIncrement;
4429  }
4430 }
4431 
4432 // add rotating drag to each atom's position
4434  FullAtom *atom = patch->atom.begin();
4435  int numAtoms = patch->numAtoms;
4436  Molecule *molecule = Node::Object()->molecule; // need its methods
4437  const BigReal rotDragGlobVel = simParams->rotDragGlobVel;
4438  const BigReal dt = timestep / TIMEFACTOR; // MUST be as in the integrator!
4439  BigReal rotDragVel, dAngle;
4440  Vector atomRadius;
4441  Vector rotDragAxis, rotDragPivot, dragIncrement;
4442  for ( int i = 0; i < numAtoms; ++i )
4443  {
4444  // skip if fixed atom or zero drag attribute
4445  if ( (simParams->fixedAtomsOn && atom[i].atomFixed)
4446  || !(molecule->is_atom_rotdragged(atom[i].id)) ) continue;
4447  molecule->get_rotdrag_params(rotDragVel, rotDragAxis, rotDragPivot, atom[i].id);
4448  dAngle = rotDragGlobVel * rotDragVel * dt;
4449  rotDragAxis /= rotDragAxis.length();
4450  atomRadius = atom[i].position - rotDragPivot;
4451  dragIncrement = cross(rotDragAxis, atomRadius) * dAngle;
4452  atom[i].position += dragIncrement;
4453  }
4454 }
4455 
4457  //
4458  // DJH: Copy all data into SOA (structure of arrays)
4459  // from AOS (array of structures) data structure.
4460  //
4461  //patch->copy_all_to_SOA();
4462 
4463  const int numberOfSteps = simParams->N;
4464  const int stepsPerCycle = simParams->stepsPerCycle;
4465 #if 0 && defined(NODEGROUP_FORCE_REGISTER)
4466  // XXX DJH: This is a hack that is found to get GPU nonbonded
4467  // force calculation right for --with-single-node-cuda builds
4468  const int stepsPerCycle_save = stepsPerCycle;
4469  simParams->stepsPerCycle = 1;
4470 #endif
4471  int &step = patch->flags.step;
4472  step = simParams->firstTimestep;
4473 
4474  int &maxForceUsed = patch->flags.maxForceUsed;
4475  int &maxForceMerged = patch->flags.maxForceMerged;
4476  maxForceUsed = Results::normal;
4477  maxForceMerged = Results::normal;
4478  int &doNonbonded = patch->flags.doNonbonded;
4479  doNonbonded = 1;
4480  maxForceUsed = Results::nbond;
4481  maxForceMerged = Results::nbond;
4482  const int dofull = ( simParams->fullElectFrequency ? 1 : 0 );
4483  int &doFullElectrostatics = patch->flags.doFullElectrostatics;
4484  doFullElectrostatics = dofull;
4485  if ( dofull ) {
4486  maxForceMerged = Results::slow;
4487  maxForceUsed = Results::slow;
4488  }
4489  int &doMolly = patch->flags.doMolly;
4490  doMolly = simParams->mollyOn && doFullElectrostatics;
4491  int &doMinimize = patch->flags.doMinimize;
4492  doMinimize = 1;
4493  // BEGIN LA
4494  int &doLoweAndersen = patch->flags.doLoweAndersen;
4495  doLoweAndersen = 0;
4496  // END LA
4497 
4498  int &doGBIS = patch->flags.doGBIS;
4499  doGBIS = simParams->GBISOn;
4500 
4501  int &doLCPO = patch->flags.doLCPO;
4502  doLCPO = simParams->LCPOOn;
4503 
4504  int doTcl = simParams->tclForcesOn;
4505  int doColvars = simParams->colvarsOn;
4506  int doGlobal = doTcl || doColvars;
4508 
4509  int &doEnergy = patch->flags.doEnergy;
4510  doEnergy = 1;
4511 
4512  // Do this to stabilize the minimizer, whether or not the user
4513  // wants rigid bond constraints enabled for dynamics.
4514  // In order to enforce, we have to call HomePatch::rattle1() directly.
4515  patch->rattle1(0.,0,0); // enforce rigid bond constraints on initial positions
4516 
4519  patch->atom.begin(),patch->atom.end());
4520  }
4521 
4522  runComputeObjects(1,step<numberOfSteps); // must migrate here!
4523 
4524  if ( doGlobal ) {
4525 #ifdef DEBUG_MINIMIZE
4526  printf("doTcl = %d doColvars = %d\n", doTcl, doColvars);
4527 #endif
4528  if ( doNonbonded ) saveForce(Results::nbond);
4529  if ( doFullElectrostatics ) saveForce(Results::slow);
4530  computeGlobal->saveTotalForces(patch);
4531  }
4532 #ifdef DEBUG_MINIMIZE
4533  else { printf("No computeGlobal\n"); }
4534 #endif
4535 
4537 
4538  submitMinimizeReductions(step,fmax2);
4539  rebalanceLoad(step);
4540 
4541  int downhill = 1; // start out just fixing bad contacts
4542  int minSeq = 0;
4543  for ( ++step; step <= numberOfSteps; ++step ) {
4544  // Blocking receive for the minimization coefficient.
4545  BigReal c = broadcast->minimizeCoefficient.get(minSeq++);
4546 
4547  if ( downhill ) {
4548  if ( c ) minimizeMoveDownhill(fmax2);
4549  else {
4550  downhill = 0;
4551  fmax2 *= 10000.;
4552  }
4553  }
4554  if ( ! downhill ) {
4555  if ( ! c ) { // new direction
4556 
4557  // Blocking receive for the minimization coefficient.
4558  c = broadcast->minimizeCoefficient.get(minSeq++);
4559 
4560  newMinimizeDirection(c); // v = c * v + f
4561 
4562  // Blocking receive for the minimization coefficient.
4563  c = broadcast->minimizeCoefficient.get(minSeq++);
4564 
4565  } // same direction
4566  newMinimizePosition(c); // x = x + c * v
4567  }
4568 
4569  runComputeObjects(!(step%stepsPerCycle),step<numberOfSteps);
4570  if ( doGlobal ) {
4571  if ( doNonbonded ) saveForce(Results::nbond);
4572  if ( doFullElectrostatics ) saveForce(Results::slow);
4573  computeGlobal->saveTotalForces(patch);
4574  }
4575  submitMinimizeReductions(step,fmax2);
4576  submitCollections(step, 1); // write out zeros for velocities
4577  rebalanceLoad(step);
4578  }
4579  quenchVelocities(); // zero out bogus velocity
4580 
4581  doMinimize = 0;
4582 
4583 #if 0
4584  // when using CUDASOAintegrate, need to update SOA data structures
4586  patch->copy_atoms_to_SOA();
4587  }
4588 #endif
4589 
4590 #if 0 && defined(NODEGROUP_FORCE_REGISTER)
4591  // XXX DJH: all patches in a PE are writing into simParams
4592  // so this hack needs a guard
4593  simParams->stepsPerCycle = stepsPerCycle_save;
4594 #endif
4595  //
4596  // DJH: Copy updates of SOA back into AOS.
4597  //
4598  //patch->copy_updates_to_AOS();
4599 }
4600 
4601 // x = x + 0.1 * unit(f) for large f
4603 
4604  FullAtom *a = patch->atom.begin();
4605  Force *f1 = patch->f[Results::normal].begin(); // includes nbond and slow
4606  int numAtoms = patch->numAtoms;
4607 
4608  for ( int i = 0; i < numAtoms; ++i ) {
4609  if ( simParams->fixedAtomsOn && a[i].atomFixed ) continue;
4610  Force f = f1[i];
4611  if ( f.length2() > fmax2 ) {
4612  a[i].position += ( 0.1 * f.unit() );
4613  int hgs = a[i].hydrogenGroupSize; // 0 if not parent
4614  for ( int j=1; j<hgs; ++j ) {
4615  a[++i].position += ( 0.1 * f.unit() );
4616  }
4617  }
4618  }
4619 
4620  patch->rattle1(0.,0,0);
4621 }
4622 
4623 // v = c * v + f
4625  FullAtom *a = patch->atom.begin();
4626  Force *f1 = patch->f[Results::normal].begin(); // includes nbond and slow
4627  const bool fixedAtomsOn = simParams->fixedAtomsOn;
4628  const bool drudeHardWallOn = simParams->drudeHardWallOn;
4629  int numAtoms = patch->numAtoms;
4630  BigReal maxv2 = 0.;
4631 
4632  for ( int i = 0; i < numAtoms; ++i ) {
4633  a[i].velocity *= c;
4634  a[i].velocity += f1[i];
4635  if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4636  a[i].velocity = a[i-1].velocity;
4637  }
4638  if ( fixedAtomsOn && a[i].atomFixed ) a[i].velocity = 0;
4639  BigReal v2 = a[i].velocity.length2();
4640  if ( v2 > maxv2 ) maxv2 = v2;
4641  }
4642 
4643  { Tensor virial; patch->minimize_rattle2( 0.1 * TIMEFACTOR / sqrt(maxv2), &virial); }
4644 
4645  maxv2 = 0.;
4646  for ( int i = 0; i < numAtoms; ++i ) {
4647  if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4648  a[i].velocity = a[i-1].velocity;
4649  }
4650  if ( fixedAtomsOn && a[i].atomFixed ) a[i].velocity = 0;
4651  BigReal v2 = a[i].velocity.length2();
4652  if ( v2 > maxv2 ) maxv2 = v2;
4653  }
4654 
4655  min_reduction->max(0,maxv2);
4656  min_reduction->submit();
4657 
4658  // prevent hydrogens from being left behind
4659  BigReal fmax2 = 0.01 * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR;
4660  // int adjustCount = 0;
4661  int hgs;
4662  for ( int i = 0; i < numAtoms; i += hgs ) {
4663  hgs = a[i].hydrogenGroupSize;
4664  BigReal minChildVel = a[i].velocity.length2();
4665  if ( minChildVel < fmax2 ) continue;
4666  int adjustChildren = 1;
4667  for ( int j = i+1; j < (i+hgs); ++j ) {
4668  if ( a[j].velocity.length2() > minChildVel ) adjustChildren = 0;
4669  }
4670  if ( adjustChildren ) {
4671  // if ( hgs > 1 ) ++adjustCount;
4672  for ( int j = i+1; j < (i+hgs); ++j ) {
4673  if (a[i].mass < 0.01) continue; // lone pair
4674  a[j].velocity = a[i].velocity;
4675  }
4676  }
4677  }
4678  // if (adjustCount) CkPrintf("Adjusting %d hydrogen groups\n", adjustCount);
4679 
4680 }
4681 
4682 // x = x + c * v
4684  FullAtom *a = patch->atom.begin();
4685  int numAtoms = patch->numAtoms;
4686 
4687  for ( int i = 0; i < numAtoms; ++i ) {
4688  a[i].position += c * a[i].velocity;
4689  }
4690 
4691  if ( simParams->drudeHardWallOn ) {
4692  for ( int i = 1; i < numAtoms; ++i ) {
4693  if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4694  a[i].position -= a[i-1].position;
4695  }
4696  }
4697  }
4698 
4699  patch->rattle1(0.,0,0);
4700 
4701  if ( simParams->drudeHardWallOn ) {
4702  for ( int i = 1; i < numAtoms; ++i ) {
4703  if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) { // drude particle
4704  a[i].position += a[i-1].position;
4705  }
4706  }
4707  }
4708 }
4709 
4710 // v = 0
4712  FullAtom *a = patch->atom.begin();
4713  int numAtoms = patch->numAtoms;
4714 
4715  for ( int i = 0; i < numAtoms; ++i ) {
4716  a[i].velocity = 0;
4717  }
4718 }
4719 
4721 
4722  FullAtom *a = patch->atom.begin();
4723  const int numAtoms = patch->numAtoms;
4724 
4725  Vector momentum = 0;
4726  BigReal mass = 0;
4727 if ( simParams->zeroMomentumAlt ) {
4728  for ( int i = 0; i < numAtoms; ++i ) {
4729  momentum += a[i].mass * a[i].velocity;
4730  mass += 1.;
4731  }
4732 } else {
4733  for ( int i = 0; i < numAtoms; ++i ) {
4734  momentum += a[i].mass * a[i].velocity;
4735  mass += a[i].mass;
4736  }
4737 }
4738 
4739  ADD_VECTOR_OBJECT(reduction,REDUCTION_HALFSTEP_MOMENTUM,momentum);
4741 }
4742 
4743 void Sequencer::correctMomentum(int step, BigReal drifttime) {
4744 
4745  //
4746  // DJH: This test should be done in SimParameters.
4747  //
4748  if ( simParams->fixedAtomsOn )
4749  NAMD_die("Cannot zero momentum when fixed atoms are present.");
4750 
4751  // Blocking receive for the momentum correction vector.
4752  const Vector dv = broadcast->momentumCorrection.get(step);
4753 
4754  const Vector dx = dv * ( drifttime / TIMEFACTOR );
4755 
4756  FullAtom *a = patch->atom.begin();
4757  const int numAtoms = patch->numAtoms;
4758 
4759 if ( simParams->zeroMomentumAlt ) {
4760  for ( int i = 0; i < numAtoms; ++i ) {
4761  a[i].velocity += dv * a[i].recipMass;
4762  a[i].position += dx * a[i].recipMass;
4763  }
4764 } else {
4765  for ( int i = 0; i < numAtoms; ++i ) {
4766  a[i].velocity += dv;
4767  a[i].position += dx;
4768  }
4769 }
4770 
4771 }
4772 
4773 // --------- For Multigrator ---------
4774 void Sequencer::scalePositionsVelocities(const Tensor& posScale, const Tensor& velScale) {
4775  FullAtom *a = patch->atom.begin();
4776  int numAtoms = patch->numAtoms;
4777  Position origin = patch->lattice.origin();
4778  if ( simParams->fixedAtomsOn ) {
4779  NAMD_bug("Sequencer::scalePositionsVelocities, fixed atoms not implemented");
4780  }
4781  if ( simParams->useGroupPressure ) {
4782  int hgs;
4783  for ( int i = 0; i < numAtoms; i += hgs ) {
4784  hgs = a[i].hydrogenGroupSize;
4785  Position pos_cm(0.0, 0.0, 0.0);
4786  Velocity vel_cm(0.0, 0.0, 0.0);
4787  BigReal m_cm = 0.0;
4788  for (int j=0;j < hgs;++j) {
4789  m_cm += a[i+j].mass;
4790  pos_cm += a[i+j].mass*a[i+j].position;
4791  vel_cm += a[i+j].mass*a[i+j].velocity;
4792  }
4793  pos_cm /= m_cm;
4794  vel_cm /= m_cm;
4795  pos_cm -= origin;
4796  Position dpos = posScale*pos_cm;
4797  Velocity dvel = velScale*vel_cm;
4798  for (int j=0;j < hgs;++j) {
4799  a[i+j].position += dpos;
4800  a[i+j].velocity += dvel;
4801  }
4802  }
4803  } else {
4804  for ( int i = 0; i < numAtoms; i++) {
4805  a[i].position += posScale*(a[i].position-origin);
4806  a[i].velocity = velScale*a[i].velocity;
4807  }
4808  }
4809 }
4810 
4811 void Sequencer::multigratorPressure(int step, int callNumber) {
4812 // Calculate new positions, momenta, and volume using positionRescaleFactor and
4813 // velocityRescaleTensor values returned from Controller::multigratorPressureCalcScale()
4815  FullAtom *a = patch->atom.begin();
4816  int numAtoms = patch->numAtoms;
4817 
4818  // Blocking receive (get) scaling factors from Controller
4819  Tensor scaleTensor = (callNumber == 1) ? broadcast->positionRescaleFactor.get(step) : broadcast->positionRescaleFactor2.get(step);
4820  Tensor velScaleTensor = (callNumber == 1) ? broadcast->velocityRescaleTensor.get(step) : broadcast->velocityRescaleTensor2.get(step);
4821  Tensor posScaleTensor = scaleTensor;
4822  posScaleTensor -= Tensor::identity();
4823  if (simParams->useGroupPressure) {
4824  velScaleTensor -= Tensor::identity();
4825  }
4826 
4827  // Scale volume
4828  patch->lattice.rescale(scaleTensor);
4829  // Scale positions and velocities
4830  scalePositionsVelocities(posScaleTensor, velScaleTensor);
4831 
4832  if (!patch->flags.doFullElectrostatics) NAMD_bug("Sequencer::multigratorPressure, doFullElectrostatics must be true");
4833 
4834  // Calculate new forces
4835  // NOTE: We should not need to migrate here since any migration should have happened in the
4836  // previous call to runComputeObjects inside the MD loop in Sequencer::integrate()
4837  const int numberOfSteps = simParams->N;
4838  const int stepsPerCycle = simParams->stepsPerCycle;
4839  runComputeObjects(0 , step<numberOfSteps, 1);
4840 
4841  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
4843 
4844  // Virials etc.
4845  Tensor virialNormal;
4846  Tensor momentumSqrSum;
4847  BigReal kineticEnergy = 0;
4848  if ( simParams->pairInteractionOn ) {
4849  if ( simParams->pairInteractionSelf ) {
4850  for ( int i = 0; i < numAtoms; ++i ) {
4851  if ( a[i].partition != 1 ) continue;
4852  kineticEnergy += a[i].mass * a[i].velocity.length2();
4853  virialNormal.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4854  }
4855  }
4856  } else {
4857  for ( int i = 0; i < numAtoms; ++i ) {
4858  if (a[i].mass < 0.01) continue;
4859  kineticEnergy += a[i].mass * a[i].velocity.length2();
4860  virialNormal.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4861  }
4862  }
4863  if (!simParams->useGroupPressure) momentumSqrSum = virialNormal;
4864  kineticEnergy *= 0.5;
4866  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virialNormal);
4867 
4868  if ( simParams->fixedAtomsOn ) {
4869  Tensor fixVirialNormal;
4870  Tensor fixVirialNbond;
4871  Tensor fixVirialSlow;
4872  Vector fixForceNormal = 0;
4873  Vector fixForceNbond = 0;
4874  Vector fixForceSlow = 0;
4875 
4876  calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
4877 
4878  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, fixVirialNormal);
4879  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, fixVirialNbond);
4880  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, fixVirialSlow);
4881  ADD_VECTOR_OBJECT(reduction, REDUCTION_EXT_FORCE_NORMAL, fixForceNormal);
4882  ADD_VECTOR_OBJECT(reduction, REDUCTION_EXT_FORCE_NBOND, fixForceNbond);
4883  ADD_VECTOR_OBJECT(reduction, REDUCTION_EXT_FORCE_SLOW, fixForceSlow);
4884  }
4885 
4886  // Internal virial and group momentum
4887  Tensor intVirialNormal;
4888  Tensor intVirialNormal2;
4889  Tensor intVirialNbond;
4890  Tensor intVirialSlow;
4891  int hgs;
4892  for ( int i = 0; i < numAtoms; i += hgs ) {
4893  hgs = a[i].hydrogenGroupSize;
4894  int j;
4895  BigReal m_cm = 0;
4896  Position x_cm(0,0,0);
4897  Velocity v_cm(0,0,0);
4898  for ( j = i; j < (i+hgs); ++j ) {
4899  m_cm += a[j].mass;
4900  x_cm += a[j].mass * a[j].position;
4901  v_cm += a[j].mass * a[j].velocity;
4902  }
4903  if (simParams->useGroupPressure) momentumSqrSum.outerAdd(1.0/m_cm, v_cm, v_cm);
4904  x_cm /= m_cm;
4905  v_cm /= m_cm;
4906  if (simParams->fixedAtomsOn) NAMD_bug("Sequencer::multigratorPressure, simParams->fixedAtomsOn not implemented yet");
4907  if ( simParams->pairInteractionOn ) {
4908  if ( simParams->pairInteractionSelf ) {
4909  NAMD_bug("Sequencer::multigratorPressure, this part needs to be implemented correctly");
4910  for ( j = i; j < (i+hgs); ++j ) {
4911  if ( a[j].partition != 1 ) continue;
4912  BigReal mass = a[j].mass;
4913  Vector v = a[j].velocity;
4914  Vector dv = v - v_cm;
4915  intVirialNormal2.outerAdd (mass, v, dv);
4916  Vector dx = a[j].position - x_cm;
4917  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
4918  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
4919  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
4920  }
4921  }
4922  } else {
4923  for ( j = i; j < (i+hgs); ++j ) {
4924  BigReal mass = a[j].mass;
4925  Vector v = a[j].velocity;
4926  Vector dv = v - v_cm;
4927  intVirialNormal2.outerAdd(mass, v, dv);
4928  Vector dx = a[j].position - x_cm;
4929  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
4930  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
4931  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
4932  }
4933  }
4934  }
4935 
4936  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NORMAL, intVirialNormal);
4937  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NORMAL, intVirialNormal2);
4938  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_NBOND, intVirialNbond);
4939  ADD_TENSOR_OBJECT(reduction, REDUCTION_INT_VIRIAL_SLOW, intVirialSlow);
4940  ADD_TENSOR_OBJECT(reduction, REDUCTION_MOMENTUM_SQUARED, momentumSqrSum);
4941 
4942  reduction->submit();
4943  }
4944 }
4945 
4946 void Sequencer::scaleVelocities(const BigReal velScale) {
4947  FullAtom *a = patch->atom.begin();
4948  int numAtoms = patch->numAtoms;
4949  for ( int i = 0; i < numAtoms; i++) {
4950  a[i].velocity *= velScale;
4951  }
4952 }
4953 
4955  FullAtom *a = patch->atom.begin();
4956  int numAtoms = patch->numAtoms;
4957  BigReal kineticEnergy = 0.0;
4958  if ( simParams->pairInteractionOn ) {
4959  if ( simParams->pairInteractionSelf ) {
4960  for (int i = 0; i < numAtoms; ++i ) {
4961  if ( a[i].partition != 1 ) continue;
4962  kineticEnergy += a[i].mass * a[i].velocity.length2();
4963  }
4964  }
4965  } else {
4966  for (int i = 0; i < numAtoms; ++i ) {
4967  kineticEnergy += a[i].mass * a[i].velocity.length2();
4968  }
4969  }
4970  kineticEnergy *= 0.5;
4971  return kineticEnergy;
4972 }
4973 
4974 void Sequencer::multigratorTemperature(int step, int callNumber) {
4976  // Blocking receive (get) velocity scaling factor.
4977  BigReal velScale = (callNumber == 1) ? broadcast->velocityRescaleFactor.get(step) : broadcast->velocityRescaleFactor2.get(step);
4978  scaleVelocities(velScale);
4979  // Calculate new kineticEnergy
4980  BigReal kineticEnergy = calcKineticEnergy();
4982  if (callNumber == 1 && !(step % simParams->multigratorPressureFreq)) {
4983  // If this is a pressure cycle, calculate new momentum squared sum
4984  FullAtom *a = patch->atom.begin();
4985  int numAtoms = patch->numAtoms;
4986  Tensor momentumSqrSum;
4987  if (simParams->useGroupPressure) {
4988  int hgs;
4989  for ( int i = 0; i < numAtoms; i += hgs ) {
4990  hgs = a[i].hydrogenGroupSize;
4991  int j;
4992  BigReal m_cm = 0;
4993  Position x_cm(0,0,0);
4994  Velocity v_cm(0,0,0);
4995  for ( j = i; j < (i+hgs); ++j ) {
4996  m_cm += a[j].mass;
4997  x_cm += a[j].mass * a[j].position;
4998  v_cm += a[j].mass * a[j].velocity;
4999  }
5000  momentumSqrSum.outerAdd(1.0/m_cm, v_cm, v_cm);
5001  }
5002  } else {
5003  for ( int i = 0; i < numAtoms; i++) {
5004  momentumSqrSum.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5005  }
5006  }
5007  ADD_TENSOR_OBJECT(multigratorReduction, MULTIGRATOR_REDUCTION_MOMENTUM_SQUARED, momentumSqrSum);
5008  }
5009  // Submit reductions (kineticEnergy and, if applicable, momentumSqrSum)
5011 
5012  }
5013 }
5014 // --------- End Multigrator ---------
5015 
5016 //
5017 // DJH: Calls one or more addForceToMomentum which in turn calls HomePatch
5018 // versions. We should inline to reduce the number of function calls.
5019 //
5020 void Sequencer::newtonianVelocities(BigReal stepscale, const BigReal timestep,
5021  const BigReal nbondstep,
5022  const BigReal slowstep,
5023  const int staleForces,
5024  const int doNonbonded,
5025  const int doFullElectrostatics)
5026 {
5027  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5028  NamdProfileEvent::NEWTONIAN_VELOCITIES);
5029 
5030  // Deterministic velocity update, account for multigrator
5031  if (staleForces || (doNonbonded && doFullElectrostatics)) {
5032  addForceToMomentum3(stepscale*timestep, Results::normal, 0,
5033  stepscale*nbondstep, Results::nbond, staleForces,
5034  stepscale*slowstep, Results::slow, staleForces);
5035  } else {
5036  addForceToMomentum(stepscale*timestep);
5037  if (staleForces || doNonbonded)
5038  addForceToMomentum(stepscale*nbondstep, Results::nbond, staleForces);
5039  if (staleForces || doFullElectrostatics)
5040  addForceToMomentum(stepscale*slowstep, Results::slow, staleForces);
5041  }
5042 }
5043 
5045 {
5046 // This routine is used for the BAOAB integrator,
5047 // Ornstein-Uhlenbeck exact solve for the O-part.
5048 // See B. Leimkuhler and C. Matthews, AMRX (2012)
5049 // Routine originally written by JPhillips, with fresh errors by CMatthews June2012
5050 
5052  {
5053  FullAtom *a = patch->atom.begin();
5054  int numAtoms = patch->numAtoms;
5055  Molecule *molecule = Node::Object()->molecule;
5056  BigReal dt = dt_fs * 0.001; // convert to ps
5059  {
5060  kbT = BOLTZMANN*adaptTempT;
5061  }
5062 
5063  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5064  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5065 
5066  for ( int i = 0; i < numAtoms; ++i )
5067  {
5068  BigReal dt_gamma = dt * a[i].langevinParam;
5069  if ( ! dt_gamma ) continue;
5070 
5071  BigReal f1 = exp( -dt_gamma );
5072  BigReal f2 = sqrt( ( 1. - f1*f1 ) * kbT *
5073  ( a[i].partition ? tempFactor : 1.0 ) *
5074  a[i].recipMass );
5075  a[i].velocity *= f1;
5076  a[i].velocity += f2 * random->gaussian_vector();
5077  }
5078  }
5079 }
5080 
5082 {
5083  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5084  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1);
5086  {
5087  FullAtom *a = patch->atom.begin();
5088  int numAtoms = patch->numAtoms;
5089  Molecule *molecule = Node::Object()->molecule;
5090  BigReal dt = dt_fs * 0.001; // convert to ps
5091  int i;
5092 
5093  if (simParams->drudeOn) {
5094  for (i = 0; i < numAtoms; i++) {
5095 
5096  if (i < numAtoms-1 &&
5097  a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5098  //printf("*** Found Drude particle %d\n", a[i+1].id);
5099  // i+1 is a Drude particle with parent i
5100 
5101  // convert from Cartesian coordinates to (COM,bond) coordinates
5102  BigReal m = a[i+1].mass / (a[i].mass + a[i+1].mass); // mass ratio
5103  Vector v_bnd = a[i+1].velocity - a[i].velocity; // vel of bond
5104  Vector v_com = a[i].velocity + m * v_bnd; // vel of COM
5105  BigReal dt_gamma;
5106 
5107  // use Langevin damping factor i for v_com
5108  dt_gamma = dt * a[i].langevinParam;
5109  if (dt_gamma != 0.0) {
5110  v_com *= ( 1. - 0.5 * dt_gamma );
5111  }
5112 
5113  // use Langevin damping factor i+1 for v_bnd
5114  dt_gamma = dt * a[i+1].langevinParam;
5115  if (dt_gamma != 0.0) {
5116  v_bnd *= ( 1. - 0.5 * dt_gamma );
5117  }
5118 
5119  // convert back
5120  a[i].velocity = v_com - m * v_bnd;
5121  a[i+1].velocity = v_bnd + a[i].velocity;
5122 
5123  i++; // +1 from loop, we've updated both particles
5124  }
5125  else {
5126  BigReal dt_gamma = dt * a[i].langevinParam;
5127  if ( ! dt_gamma ) continue;
5128 
5129  a[i].velocity *= ( 1. - 0.5 * dt_gamma );
5130  }
5131 
5132  } // end for
5133  } // end if drudeOn
5134  else {
5135 
5136  //
5137  // DJH: The conditional inside loop prevents vectorization and doesn't
5138  // avoid much work since addition and multiplication are cheap.
5139  //
5140  for ( i = 0; i < numAtoms; ++i )
5141  {
5142  BigReal dt_gamma = dt * a[i].langevinParam;
5143  if ( ! dt_gamma ) continue;
5144 
5145  a[i].velocity *= ( 1. - 0.5 * dt_gamma );
5146  }
5147 
5148  } // end else
5149 
5150  } // end if langevinOn
5151 }
5152 
5153 
5155 {
5156  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5157  NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2);
5159  {
5160  //
5161  // DJH: This call is expensive. Avoid calling when gammas don't differ.
5162  // Set flag in SimParameters and make this call conditional.
5163  //
5164  TIMER_START(patch->timerSet, RATTLE1);
5165  rattle1(dt_fs,1); // conserve momentum if gammas differ
5166  TIMER_STOP(patch->timerSet, RATTLE1);
5167 
5168  FullAtom *a = patch->atom.begin();
5169  int numAtoms = patch->numAtoms;
5170  Molecule *molecule = Node::Object()->molecule;
5171  BigReal dt = dt_fs * 0.001; // convert to ps
5174  {
5175  kbT = BOLTZMANN*adaptTempT;
5176  }
5177  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5178  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5179  int i;
5180 
5181  if (simParams->drudeOn) {
5182  BigReal kbT_bnd = BOLTZMANN*(simParams->drudeTemp); // drude bond Temp
5183 
5184  for (i = 0; i < numAtoms; i++) {
5185 
5186  if (i < numAtoms-1 &&
5187  a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5188  //printf("*** Found Drude particle %d\n", a[i+1].id);
5189  // i+1 is a Drude particle with parent i
5190 
5191  // convert from Cartesian coordinates to (COM,bond) coordinates
5192  BigReal m = a[i+1].mass / (a[i].mass + a[i+1].mass); // mass ratio
5193  Vector v_bnd = a[i+1].velocity - a[i].velocity; // vel of bond
5194  Vector v_com = a[i].velocity + m * v_bnd; // vel of COM
5195  BigReal dt_gamma;
5196 
5197  // use Langevin damping factor i for v_com
5198  dt_gamma = dt * a[i].langevinParam;
5199  if (dt_gamma != 0.0) {
5200  BigReal mass = a[i].mass + a[i+1].mass;
5201  v_com += random->gaussian_vector() *
5202  sqrt( 2 * dt_gamma * kbT *
5203  ( a[i].partition ? tempFactor : 1.0 ) / mass );
5204  v_com /= ( 1. + 0.5 * dt_gamma );
5205  }
5206 
5207  // use Langevin damping factor i+1 for v_bnd
5208  dt_gamma = dt * a[i+1].langevinParam;
5209  if (dt_gamma != 0.0) {
5210  BigReal mass = a[i+1].mass * (1. - m);
5211  v_bnd += random->gaussian_vector() *
5212  sqrt( 2 * dt_gamma * kbT_bnd *
5213  ( a[i+1].partition ? tempFactor : 1.0 ) / mass );
5214  v_bnd /= ( 1. + 0.5 * dt_gamma );
5215  }
5216 
5217  // convert back
5218  a[i].velocity = v_com - m * v_bnd;
5219  a[i+1].velocity = v_bnd + a[i].velocity;
5220 
5221  i++; // +1 from loop, we've updated both particles
5222  }
5223  else {
5224  BigReal dt_gamma = dt * a[i].langevinParam;
5225  if ( ! dt_gamma ) continue;
5226 
5227  a[i].velocity += random->gaussian_vector() *
5228  sqrt( 2 * dt_gamma * kbT *
5229  ( a[i].partition ? tempFactor : 1.0 ) * a[i].recipMass );
5230  a[i].velocity /= ( 1. + 0.5 * dt_gamma );
5231  }
5232 
5233  } // end for
5234  } // end if drudeOn
5235  else {
5236 
5237  //
5238  // DJH: For case using same gamma (the Langevin parameter),
5239  // no partitions (e.g. FEP), and no adaptive tempering (adaptTempMD),
5240  // we can precompute constants. Then by lifting the RNG from the
5241  // loop (filling up an array of random numbers), we can vectorize
5242  // loop and simplify arithmetic to just addition and multiplication.
5243  //
5244  for ( i = 0; i < numAtoms; ++i )
5245  {
5246  BigReal dt_gamma = dt * a[i].langevinParam;
5247  if ( ! dt_gamma ) continue;
5248 
5249  a[i].velocity += random->gaussian_vector() *
5250  sqrt( 2 * dt_gamma * kbT *
5251  ( a[i].partition ? tempFactor : 1.0 ) * a[i].recipMass );
5252  a[i].velocity /= ( 1. + 0.5 * dt_gamma );
5253  }
5254 
5255  } // end else
5256 
5257  } // end if langevinOn
5258 }
5259 
5260 
5262 {
5263  if ( simParams->berendsenPressureOn ) {
5265  const int freq = simParams->berendsenPressureFreq;
5266  if ( ! (berendsenPressure_count % freq ) ) {
5268  FullAtom *a = patch->atom.begin();
5269  int numAtoms = patch->numAtoms;
5270  // Blocking receive for the updated lattice scaling factor.
5271  Tensor factor = broadcast->positionRescaleFactor.get(step);
5272  patch->lattice.rescale(factor);
5273  if ( simParams->useGroupPressure )
5274  {
5275  int hgs;
5276  for ( int i = 0; i < numAtoms; i += hgs ) {
5277  int j;
5278  hgs = a[i].hydrogenGroupSize;
5279  if ( simParams->fixedAtomsOn && a[i].groupFixed ) {
5280  for ( j = i; j < (i+hgs); ++j ) {
5282  a[j].fixedPosition,a[j].transform);
5283  }
5284  continue;
5285  }
5286  BigReal m_cm = 0;
5287  Position x_cm(0,0,0);
5288  for ( j = i; j < (i+hgs); ++j ) {
5289  if ( simParams->fixedAtomsOn && a[j].atomFixed ) continue;
5290  m_cm += a[j].mass;
5291  x_cm += a[j].mass * a[j].position;
5292  }
5293  x_cm /= m_cm;
5294  Position new_x_cm = x_cm;
5295  patch->lattice.rescale(new_x_cm,factor);
5296  Position delta_x_cm = new_x_cm - x_cm;
5297  for ( j = i; j < (i+hgs); ++j ) {
5298  if ( simParams->fixedAtomsOn && a[j].atomFixed ) {
5300  a[j].fixedPosition,a[j].transform);
5301  continue;
5302  }
5303  a[j].position += delta_x_cm;
5304  }
5305  }
5306  }
5307  else
5308  {
5309  for ( int i = 0; i < numAtoms; ++i )
5310  {
5311  if ( simParams->fixedAtomsOn && a[i].atomFixed ) {
5313  a[i].fixedPosition,a[i].transform);
5314  continue;
5315  }
5316  patch->lattice.rescale(a[i].position,factor);
5317  }
5318  }
5319  }
5320  } else {
5322  }
5323 }
5324 
5326 {
5327  if ( simParams->langevinPistonOn && ! ( (step-1-slowFreq/2) % slowFreq ) )
5328  {
5329  //
5330  // DJH: Loops below simplify if we lift out special cases of fixed atoms
5331  // and pressure excluded atoms and make them their own branch.
5332  //
5333  FullAtom *a = patch->atom.begin();
5334  int numAtoms = patch->numAtoms;
5335  // Blocking receive for the updated lattice scaling factor.
5336  Tensor factor = broadcast->positionRescaleFactor.get(step);
5337  TIMER_START(patch->timerSet, PISTON);
5338  // JCP FIX THIS!!!
5339  Vector velFactor(1/factor.xx,1/factor.yy,1/factor.zz);
5340  patch->lattice.rescale(factor);
5341  Molecule *mol = Node::Object()->molecule;
5342  if ( simParams->useGroupPressure )
5343  {
5344  int hgs;
5345  for ( int i = 0; i < numAtoms; i += hgs ) {
5346  int j;
5347  hgs = a[i].hydrogenGroupSize;
5348  if ( simParams->fixedAtomsOn && a[i].groupFixed ) {
5349  for ( j = i; j < (i+hgs); ++j ) {
5351  a[j].fixedPosition,a[j].transform);
5352  }
5353  continue;
5354  }
5355  BigReal m_cm = 0;
5356  Position x_cm(0,0,0);
5357  Velocity v_cm(0,0,0);
5358  for ( j = i; j < (i+hgs); ++j ) {
5359  if ( simParams->fixedAtomsOn && a[j].atomFixed ) continue;
5360  m_cm += a[j].mass;
5361  x_cm += a[j].mass * a[j].position;
5362  v_cm += a[j].mass * a[j].velocity;
5363  }
5364  x_cm /= m_cm;
5365  Position new_x_cm = x_cm;
5366  patch->lattice.rescale(new_x_cm,factor);
5367  Position delta_x_cm = new_x_cm - x_cm;
5368  v_cm /= m_cm;
5369  Velocity delta_v_cm;
5370  delta_v_cm.x = ( velFactor.x - 1 ) * v_cm.x;
5371  delta_v_cm.y = ( velFactor.y - 1 ) * v_cm.y;
5372  delta_v_cm.z = ( velFactor.z - 1 ) * v_cm.z;
5373  for ( j = i; j < (i+hgs); ++j ) {
5374  if ( simParams->fixedAtomsOn && a[j].atomFixed ) {
5376  a[j].fixedPosition,a[j].transform);
5377  continue;
5378  }
5379  if ( mol->is_atom_exPressure(a[j].id) ) continue;
5380  a[j].position += delta_x_cm;
5381  a[j].velocity += delta_v_cm;
5382  }
5383  }
5384  }
5385  else
5386  {
5387  for ( int i = 0; i < numAtoms; ++i )
5388  {
5389  if ( simParams->fixedAtomsOn && a[i].atomFixed ) {
5391  a[i].fixedPosition,a[i].transform);
5392  continue;
5393  }
5394  if ( mol->is_atom_exPressure(a[i].id) ) continue;
5395  patch->lattice.rescale(a[i].position,factor);
5396  a[i].velocity.x *= velFactor.x;
5397  a[i].velocity.y *= velFactor.y;
5398  a[i].velocity.z *= velFactor.z;
5399  }
5400  }
5401  TIMER_STOP(patch->timerSet, PISTON);
5402  }
5403 }
5404 
5406 {
5407  const int rescaleFreq = simParams->rescaleFreq;
5408  if ( rescaleFreq > 0 ) {
5409  FullAtom *a = patch->atom.begin();
5410  int numAtoms = patch->numAtoms;
5412  if ( rescaleVelocities_numTemps == rescaleFreq ) {
5413  // Blocking receive for the velcity scaling factor.
5414  BigReal factor = broadcast->velocityRescaleFactor.get(step);
5415  for ( int i = 0; i < numAtoms; ++i )
5416  {
5417  a[i].velocity *= factor;
5418  }
5420  }
5421  }
5422 }
5423 
5424 void Sequencer::rescaleaccelMD (int step, int doNonbonded, int doFullElectrostatics)
5425 {
5426  if (!simParams->accelMDOn) return;
5427  if ((step < simParams->accelMDFirstStep) || ( simParams->accelMDLastStep >0 && step > simParams->accelMDLastStep)) return;
5428 
5429  // Blocking receive for the Accelerated MD scaling factors.
5430  Vector accelMDfactor = broadcast->accelMDRescaleFactor.get(step);
5431  const BigReal factor_dihe = accelMDfactor[0];
5432  const BigReal factor_tot = accelMDfactor[1];
5433  const int numAtoms = patch->numAtoms;
5434 
5435  if (simParams->accelMDdihe && factor_tot <1 )
5436  NAMD_die("accelMD broadcasting error!\n");
5437  if (!simParams->accelMDdihe && !simParams->accelMDdual && factor_dihe <1 )
5438  NAMD_die("accelMD broadcasting error!\n");
5439 
5440  if (simParams->accelMDdihe && factor_dihe < 1) {
5441  for (int i = 0; i < numAtoms; ++i)
5442  if (patch->f[Results::amdf][i][0] || patch->f[Results::amdf][i][1] || patch->f[Results::amdf][i][2])
5443  patch->f[Results::normal][i] += patch->f[Results::amdf][i]*(factor_dihe - 1);
5444  }
5445 
5446  if ( !simParams->accelMDdihe && factor_tot < 1) {
5447  for (int i = 0; i < numAtoms; ++i)
5448  patch->f[Results::normal][i] *= factor_tot;
5449  if (doNonbonded) {
5450  for (int i = 0; i < numAtoms; ++i)
5451  patch->f[Results::nbond][i] *= factor_tot;
5452  }
5453  if (doFullElectrostatics) {
5454  for (int i = 0; i < numAtoms; ++i)
5455  patch->f[Results::slow][i] *= factor_tot;
5456  }
5457  }
5458 
5459  if (simParams->accelMDdual && factor_dihe < 1) {
5460  for (int i = 0; i < numAtoms; ++i)
5461  if (patch->f[Results::amdf][i][0] || patch->f[Results::amdf][i][1] || patch->f[Results::amdf][i][2])
5462  patch->f[Results::normal][i] += patch->f[Results::amdf][i]*(factor_dihe - factor_tot);
5463  }
5464 
5465 }
5466 
5468 {
5469  //check if adaptive tempering is enabled and in the right timestep range
5470  if (!simParams->adaptTempOn) return;
5471  if ( (step < simParams->adaptTempFirstStep ) ||
5473  if (simParams->langevinOn) // restore langevin temperature
5475  return;
5476  }
5477  // Get Updated Temperature
5478  if ( !(step % simParams->adaptTempFreq ) && (step > simParams->firstTimestep ))
5479  // Blocking receive for the updated adaptive tempering temperature.
5481 }
5482 
5483 void Sequencer::reassignVelocities(BigReal timestep, int step)
5484 {
5485  const int reassignFreq = simParams->reassignFreq;
5486  if ( ( reassignFreq > 0 ) && ! ( step % reassignFreq ) ) {
5487  FullAtom *a = patch->atom.begin();
5488  int numAtoms = patch->numAtoms;
5489  BigReal newTemp = simParams->reassignTemp;
5490  newTemp += ( step / reassignFreq ) * simParams->reassignIncr;
5491  if ( simParams->reassignIncr > 0.0 ) {
5492  if ( newTemp > simParams->reassignHold && simParams->reassignHold > 0.0 )
5493  newTemp = simParams->reassignHold;
5494  } else {
5495  if ( newTemp < simParams->reassignHold )
5496  newTemp = simParams->reassignHold;
5497  }
5498  BigReal kbT = BOLTZMANN * newTemp;
5499 
5500  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5501  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5502 
5503  for ( int i = 0; i < numAtoms; ++i )
5504  {
5505  a[i].velocity = ( ( simParams->fixedAtomsOn &&
5506  a[i].atomFixed && a[i].mass > 0.) ? Vector(0,0,0) :
5507  sqrt(kbT * (a[i].partition ? tempFactor : 1.0) * a[i].recipMass) *
5508  random->gaussian_vector() );
5509  }
5510  } else {
5511  NAMD_bug("Sequencer::reassignVelocities called improperly!");
5512  }
5513 }
5514 
5516 {
5517  FullAtom *a = patch->atom.begin();
5518  int numAtoms = patch->numAtoms;
5519  BigReal newTemp = simParams->initialTemp;
5520  BigReal kbT = BOLTZMANN * newTemp;
5521 
5522  int lesReduceTemp = simParams->lesOn && simParams->lesReduceTemp;
5523  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
5524 
5525  for ( int i = 0; i < numAtoms; ++i )
5526  {
5527  a[i].velocity = ( ( (simParams->fixedAtomsOn && a[i].atomFixed) ||
5528  a[i].mass <= 0.) ? Vector(0,0,0) :
5529  sqrt(kbT * (a[i].partition ? tempFactor : 1.0) * a[i].recipMass) *
5530  random->gaussian_vector() );
5531  if ( simParams->drudeOn && i+1 < numAtoms && a[i+1].mass < 1.0 && a[i+1].mass > 0.05 ) {
5532  a[i+1].velocity = a[i].velocity; // zero is good enough
5533  ++i;
5534  }
5535  }
5536 }
5537 
5539 {
5540  FullAtom *a = patch->atom.begin();
5541  int numAtoms = patch->numAtoms;
5542  for ( int i = 0; i < numAtoms; ++i )
5543  {
5544  a[i].velocity *= factor;
5545  }
5546 }
5547 
5549 {
5550  FullAtom *a = patch->atom.begin();
5551  int numAtoms = patch->numAtoms;
5552  Molecule *molecule = Node::Object()->molecule;
5553  for ( int i = 0; i < numAtoms; ++i )
5554  {
5555  a[i].charge = molecule->atomcharge(a[i].id);
5556  }
5557 }
5558 
5559 // REST2 solute charge scaling
5561 {
5562  FullAtom *a = patch->atom.begin();
5563  int numAtoms = patch->numAtoms;
5564  Molecule *molecule = Node::Object()->molecule;
5565  BigReal sqrt_factor = sqrt(factor);
5566  // apply scaling to the original charge (stored in molecule)
5567  // of just the marked solute atoms
5568  for ( int i = 0; i < numAtoms; ++i ) {
5569  if (molecule->get_ss_type(a[i].id)) {
5570  a[i].charge = sqrt_factor * molecule->atomcharge(a[i].id);
5571  if (simParams->SOAintegrateOn) patch->patchDataSOA.charge[i] = a[i].charge;
5572  }
5573  }
5574 }
5575 
5577 {
5578  if ( simParams->tCoupleOn )
5579  {
5580  FullAtom *a = patch->atom.begin();
5581  int numAtoms = patch->numAtoms;
5582  // Blocking receive for the temperature coupling coefficient.
5583  BigReal coefficient = broadcast->tcoupleCoefficient.get(step);
5584  Molecule *molecule = Node::Object()->molecule;
5585  BigReal dt = dt_fs * 0.001; // convert to ps
5586  coefficient *= dt;
5587  for ( int i = 0; i < numAtoms; ++i )
5588  {
5589  BigReal f1 = exp( coefficient * a[i].langevinParam );
5590  a[i].velocity *= f1;
5591  }
5592  }
5593 }
5594 
5600 {
5603  FullAtom *a = patch->atom.begin();
5604  int numAtoms = patch->numAtoms;
5605  // Blocking receive for the temperature coupling coefficient.
5606  BigReal velrescaling = broadcast->stochRescaleCoefficient.get(step);
5607  DebugM(4, "stochastically rescaling velocities at step " << step << " by " << velrescaling << "\n");
5608  for ( int i = 0; i < numAtoms; ++i ) {
5609  a[i].velocity *= velrescaling;
5610  }
5611  stochRescale_count = 0;
5612  }
5613 }
5614 
5615 void Sequencer::saveForce(const int ftag)
5616 {
5617  patch->saveForce(ftag);
5618 }
5619 
5620 //
5621 // DJH: Need to change division by TIMEFACTOR into multiplication by
5622 // reciprocal of TIMEFACTOR. Done several times for each iteration of
5623 // the integrate() loop.
5624 //
5625 
5627  BigReal timestep, const int ftag, const int useSaved
5628  ) {
5629  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5630  NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5631 #if CMK_BLUEGENEL
5632  CmiNetworkProgressAfter (0);
5633 #endif
5634  const BigReal dt = timestep / TIMEFACTOR;
5635  FullAtom *atom_arr = patch->atom.begin();
5636  ForceList *f_use = (useSaved ? patch->f_saved : patch->f);
5637  const Force *force_arr = f_use[ftag].const_begin();
5638  patch->addForceToMomentum(atom_arr, force_arr, dt, patch->numAtoms);
5639 }
5640 
5642  const BigReal timestep1, const int ftag1, const int useSaved1,
5643  const BigReal timestep2, const int ftag2, const int useSaved2,
5644  const BigReal timestep3, const int ftag3, const int useSaved3
5645  ) {
5646  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5647  NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5648 #if CMK_BLUEGENEL
5649  CmiNetworkProgressAfter (0);
5650 #endif
5651  const BigReal dt1 = timestep1 / TIMEFACTOR;
5652  const BigReal dt2 = timestep2 / TIMEFACTOR;
5653  const BigReal dt3 = timestep3 / TIMEFACTOR;
5654  ForceList *f_use1 = (useSaved1 ? patch->f_saved : patch->f);
5655  ForceList *f_use2 = (useSaved2 ? patch->f_saved : patch->f);
5656  ForceList *f_use3 = (useSaved3 ? patch->f_saved : patch->f);
5657  FullAtom *atom_arr = patch->atom.begin();
5658  const Force *force_arr1 = f_use1[ftag1].const_begin();
5659  const Force *force_arr2 = f_use2[ftag2].const_begin();
5660  const Force *force_arr3 = f_use3[ftag3].const_begin();
5661  patch->addForceToMomentum3 (atom_arr, force_arr1, force_arr2, force_arr3,
5662  dt1, dt2, dt3, patch->numAtoms);
5663 }
5664 
5666 {
5667  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5668  NamdProfileEvent::ADD_VELOCITY_TO_POSITION);
5669 #if CMK_BLUEGENEL
5670  CmiNetworkProgressAfter (0);
5671 #endif
5672  const BigReal dt = timestep / TIMEFACTOR;
5673  FullAtom *atom_arr = patch->atom.begin();
5674  patch->addVelocityToPosition(atom_arr, dt, patch->numAtoms);
5675 }
5676 
5677 void Sequencer::hardWallDrude(BigReal dt, int pressure)
5678 {
5679  if ( simParams->drudeHardWallOn ) {
5680  Tensor virial;
5681  Tensor *vp = ( pressure ? &virial : 0 );
5682  if ( patch->hardWallDrude(dt, vp, pressureProfileReduction) ) {
5683  iout << iERROR << "Constraint failure in HardWallDrude(); "
5684  << "simulation may become unstable.\n" << endi;
5686  terminate();
5687  }
5688  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5689  }
5690 }
5691 
5692 void Sequencer::rattle1(BigReal dt, int pressure)
5693 {
5694  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::RATTLE1);
5695  if ( simParams->rigidBonds != RIGID_NONE ) {
5696  Tensor virial;
5697  Tensor *vp = ( pressure ? &virial : 0 );
5698  if ( patch->rattle1(dt, vp, pressureProfileReduction) ) {
5699  iout << iERROR <<
5700  "Constraint failure; simulation has become unstable.\n" << endi;
5702  terminate();
5703  }
5704 #if 0
5705  printf("virial = %g %g %g %g %g %g %g %g %g\n",
5706  virial.xx, virial.xy, virial.xz,
5707  virial.yx, virial.yy, virial.yz,
5708  virial.zx, virial.zy, virial.zz);
5709 #endif
5710  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5711 #if 0
5712  {
5713  const FullAtom *a = patch->atom.const_begin();
5714  for (int n=0; n < patch->numAtoms; n++) {
5715  printf("pos[%d] = %g %g %g\n", n,
5716  a[n].position.x, a[n].position.y, a[n].position.z);
5717  }
5718  for (int n=0; n < patch->numAtoms; n++) {
5719  printf("vel[%d] = %g %g %g\n", n,
5720  a[n].velocity.x, a[n].velocity.y, a[n].velocity.z);
5721  }
5722  if (pressure) {
5723  for (int n=0; n < patch->numAtoms; n++) {
5724  printf("force[%d] = %g %g %g\n", n,
5725  patch->f[Results::normal][n].x,
5726  patch->f[Results::normal][n].y,
5727  patch->f[Results::normal][n].z);
5728  }
5729  }
5730  }
5731 #endif
5732  }
5733 }
5734 
5735 // void Sequencer::rattle2(BigReal dt, int step)
5736 // {
5737 // if ( simParams->rigidBonds != RIGID_NONE ) {
5738 // Tensor virial;
5739 // patch->rattle2(dt, &virial);
5740 // ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5741 // // we need to add to alt and int virial because not included in forces
5742 // #ifdef ALTVIRIAL
5743 // ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NORMAL,virial);
5744 // #endif
5745 // ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,virial);
5746 // }
5747 // }
5748 
5750 {
5751  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::MAXIMUM_MOVE);
5752 
5753  FullAtom *a = patch->atom.begin();
5754  int numAtoms = patch->numAtoms;
5755  if ( simParams->maximumMove ) {
5756  const BigReal dt = timestep / TIMEFACTOR;
5757  const BigReal maxvel = simParams->maximumMove / dt;
5758  const BigReal maxvel2 = maxvel * maxvel;
5759  for ( int i=0; i<numAtoms; ++i ) {
5760  if ( a[i].velocity.length2() > maxvel2 ) {
5761  a[i].velocity *= ( maxvel / a[i].velocity.length() );
5762  }
5763  }
5764  } else {
5765  const BigReal dt = timestep / TIMEFACTOR;
5766  const BigReal maxvel = simParams->cutoff / dt;
5767  const BigReal maxvel2 = maxvel * maxvel;
5768  int killme = 0;
5769  for ( int i=0; i<numAtoms; ++i ) {
5770  killme = killme || ( a[i].velocity.length2() > maxvel2 );
5771  }
5772  if ( killme ) {
5773  killme = 0;
5774  for ( int i=0; i<numAtoms; ++i ) {
5775  if ( a[i].velocity.length2() > maxvel2 ) {
5776  ++killme;
5777  iout << iERROR << "Atom " << (a[i].id + 1) << " velocity is "
5778  << ( PDBVELFACTOR * a[i].velocity ) << " (limit is "
5779  << ( PDBVELFACTOR * maxvel ) << ", atom "
5780  << i << " of " << numAtoms << " on patch "
5781  << patch->patchID << " pe " << CkMyPe() << ")\n" << endi;
5782  }
5783  }
5784  iout << iERROR <<
5785  "Atoms moving too fast; simulation has become unstable ("
5786  << killme << " atoms on patch " << patch->patchID
5787  << " pe " << CkMyPe() << ").\n" << endi;
5789  terminate();
5790  }
5791  }
5792 }
5793 
5795 {
5796  if ( simParams->minimizeOn ) {
5797  FullAtom *a = patch->atom.begin();
5798  int numAtoms = patch->numAtoms;
5799  for ( int i=0; i<numAtoms; ++i ) {
5800  a[i].velocity = 0.;
5801  }
5802  }
5803 }
5804 
5806 {
5807  NAMD_EVENT_RANGE_2(patch->flags.event_on, NamdProfileEvent::SUBMIT_HALFSTEP);
5808 
5809  // velocity-dependent quantities *** ONLY ***
5810  // positions are not at half-step when called
5811  FullAtom *a = patch->atom.begin();
5812  int numAtoms = patch->numAtoms;
5813 
5814 #if CMK_BLUEGENEL
5815  CmiNetworkProgressAfter (0);
5816 #endif
5817 
5818  // For non-Multigrator doKineticEnergy = 1 always
5819  Tensor momentumSqrSum;
5821  {
5822  BigReal kineticEnergy = 0;
5823  Tensor virial;
5824  if ( simParams->pairInteractionOn ) {
5825  if ( simParams->pairInteractionSelf ) {
5826  for ( int i = 0; i < numAtoms; ++i ) {
5827  if ( a[i].partition != 1 ) continue;
5828  kineticEnergy += a[i].mass * a[i].velocity.length2();
5829  virial.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5830  }
5831  }
5832  } else {
5833  for ( int i = 0; i < numAtoms; ++i ) {
5834  if (a[i].mass < 0.01) continue;
5835  kineticEnergy += a[i].mass * a[i].velocity.length2();
5836  virial.outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5837  }
5838  }
5839 
5841  momentumSqrSum = virial;
5842  }
5843  kineticEnergy *= 0.5 * 0.5;
5845  virial *= 0.5;
5846  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,virial);
5847 #ifdef ALTVIRIAL
5848  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NORMAL,virial);
5849 #endif
5850  }
5851 
5853  int nslabs = simParams->pressureProfileSlabs;
5854  const Lattice &lattice = patch->lattice;
5855  BigReal idz = nslabs/lattice.c().z;
5856  BigReal zmin = lattice.origin().z - 0.5*lattice.c().z;
5857  int useGroupPressure = simParams->useGroupPressure;
5858 
5859  // Compute kinetic energy partition, possibly subtracting off
5860  // internal kinetic energy if group pressure is enabled.
5861  // Since the regular pressure is 1/2 mvv and the internal kinetic
5862  // term that is subtracted off for the group pressure is
5863  // 1/2 mv (v-v_cm), the group pressure kinetic contribution is
5864  // 1/2 m * v * v_cm. The factor of 1/2 is because submitHalfstep
5865  // gets called twice per timestep.
5866  int hgs;
5867  for (int i=0; i<numAtoms; i += hgs) {
5868  int j, ppoffset;
5869  hgs = a[i].hydrogenGroupSize;
5870  int partition = a[i].partition;
5871 
5872  BigReal m_cm = 0;
5873  Velocity v_cm(0,0,0);
5874  for (j=i; j< i+hgs; ++j) {
5875  m_cm += a[j].mass;
5876  v_cm += a[j].mass * a[j].velocity;
5877  }
5878  v_cm /= m_cm;
5879  for (j=i; j < i+hgs; ++j) {
5880  BigReal mass = a[j].mass;
5881  if (! (useGroupPressure && j != i)) {
5882  BigReal z = a[j].position.z;
5883  int slab = (int)floor((z-zmin)*idz);
5884  if (slab < 0) slab += nslabs;
5885  else if (slab >= nslabs) slab -= nslabs;
5886  ppoffset = 3*(slab + partition*nslabs);
5887  }
5888  BigReal wxx, wyy, wzz;
5889  if (useGroupPressure) {
5890  wxx = 0.5*mass * a[j].velocity.x * v_cm.x;
5891  wyy = 0.5*mass * a[j].velocity.y * v_cm.y;
5892  wzz = 0.5*mass * a[j].velocity.z * v_cm.z;
5893  } else {
5894  wxx = 0.5*mass * a[j].velocity.x * a[j].velocity.x;
5895  wyy = 0.5*mass * a[j].velocity.y * a[j].velocity.y;
5896  wzz = 0.5*mass * a[j].velocity.z * a[j].velocity.z;
5897  }
5898  pressureProfileReduction->item(ppoffset ) += wxx;
5899  pressureProfileReduction->item(ppoffset+1) += wyy;
5900  pressureProfileReduction->item(ppoffset+2) += wzz;
5901  }
5902  }
5903  }
5904 
5905  // For non-Multigrator doKineticEnergy = 1 always
5907  {
5908  BigReal intKineticEnergy = 0;
5909  Tensor intVirialNormal;
5910 
5911  int hgs;
5912  for ( int i = 0; i < numAtoms; i += hgs ) {
5913 
5914 #if CMK_BLUEGENEL
5915  CmiNetworkProgress ();
5916 #endif
5917 
5918  hgs = a[i].hydrogenGroupSize;
5919  int j;
5920  BigReal m_cm = 0;
5921  Velocity v_cm(0,0,0);
5922  for ( j = i; j < (i+hgs); ++j ) {
5923  m_cm += a[j].mass;
5924  v_cm += a[j].mass * a[j].velocity;
5925  }
5927  momentumSqrSum.outerAdd(1.0/m_cm, v_cm, v_cm);
5928  }
5929  v_cm /= m_cm;
5930  if ( simParams->pairInteractionOn ) {
5931  if ( simParams->pairInteractionSelf ) {
5932  for ( j = i; j < (i+hgs); ++j ) {
5933  if ( a[j].partition != 1 ) continue;
5934  BigReal mass = a[j].mass;
5935  Vector v = a[j].velocity;
5936  Vector dv = v - v_cm;
5937  intKineticEnergy += mass * (v * dv);
5938  intVirialNormal.outerAdd (mass, v, dv);
5939  }
5940  }
5941  } else {
5942  for ( j = i; j < (i+hgs); ++j ) {
5943  BigReal mass = a[j].mass;
5944  Vector v = a[j].velocity;
5945  Vector dv = v - v_cm;
5946  intKineticEnergy += mass * (v * dv);
5947  intVirialNormal.outerAdd(mass, v, dv);
5948  }
5949  }
5950  }
5951  intKineticEnergy *= 0.5 * 0.5;
5953  intVirialNormal *= 0.5;
5954  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
5955  if ( simParams->multigratorOn) {
5956  momentumSqrSum *= 0.5;
5957  ADD_TENSOR_OBJECT(reduction,REDUCTION_MOMENTUM_SQUARED,momentumSqrSum);
5958  }
5959  }
5960 
5961 }
5962 
5963 void Sequencer::calcFixVirial(Tensor& fixVirialNormal, Tensor& fixVirialNbond, Tensor& fixVirialSlow,
5964  Vector& fixForceNormal, Vector& fixForceNbond, Vector& fixForceSlow) {
5965 
5966  FullAtom *a = patch->atom.begin();
5967  int numAtoms = patch->numAtoms;
5968 
5969  for ( int j = 0; j < numAtoms; j++ ) {
5970  if ( simParams->fixedAtomsOn && a[j].atomFixed ) {
5971  Vector dx = a[j].fixedPosition;
5972  // all negative because fixed atoms cancels these forces
5973  fixVirialNormal.outerAdd(-1.0, patch->f[Results::normal][j], dx);
5974  fixVirialNbond.outerAdd(-1.0, patch->f[Results::nbond][j], dx);
5975  fixVirialSlow.outerAdd(-1.0, patch->f[Results::slow][j], dx);
5976  fixForceNormal -= patch->f[Results::normal][j];
5977  fixForceNbond -= patch->f[Results::nbond][j];
5978  fixForceSlow -= patch->f[Results::slow][j];
5979  }
5980  }
5981 }
5982 
5984 {
5985 #ifndef UPPER_BOUND
5986  NAMD_EVENT_RANGE_2(patch->flags.event_on,
5987  NamdProfileEvent::SUBMIT_REDUCTIONS);
5988  FullAtom *a = patch->atom.begin();
5989 #endif
5990  int numAtoms = patch->numAtoms;
5991 
5992 #if CMK_BLUEGENEL
5993  CmiNetworkProgressAfter(0);
5994 #endif
5995 
5996  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
5998 
5999 #ifndef UPPER_BOUND
6000  // For non-Multigrator doKineticEnergy = 1 always
6002  {
6003  BigReal kineticEnergy = 0;
6004  Vector momentum = 0;
6005  Vector angularMomentum = 0;
6006  Vector o = patch->lattice.origin();
6007  int i;
6008  if ( simParams->pairInteractionOn ) {
6009  if ( simParams->pairInteractionSelf ) {
6010  for (i = 0; i < numAtoms; ++i ) {
6011  if ( a[i].partition != 1 ) continue;
6012  kineticEnergy += a[i].mass * a[i].velocity.length2();
6013  momentum += a[i].mass * a[i].velocity;
6014  angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
6015  }
6016  }
6017  } else {
6018  for (i = 0; i < numAtoms; ++i ) {
6019  kineticEnergy += a[i].mass * a[i].velocity.length2();
6020  momentum += a[i].mass * a[i].velocity;
6021  angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
6022  }
6023  if (simParams->drudeOn) {
6024  BigReal drudeComKE = 0.;
6025  BigReal drudeBondKE = 0.;
6026 
6027  for (i = 0; i < numAtoms; i++) {
6028  if (i < numAtoms-1 &&
6029  a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
6030  // i+1 is a Drude particle with parent i
6031 
6032  // convert from Cartesian coordinates to (COM,bond) coordinates
6033  BigReal m_com = (a[i].mass + a[i+1].mass); // mass of COM
6034  BigReal m = a[i+1].mass / m_com; // mass ratio
6035  BigReal m_bond = a[i+1].mass * (1. - m); // mass of bond
6036  Vector v_bond = a[i+1].velocity - a[i].velocity; // vel of bond
6037  Vector v_com = a[i].velocity + m * v_bond; // vel of COM
6038 
6039  drudeComKE += m_com * v_com.length2();
6040  drudeBondKE += m_bond * v_bond.length2();
6041 
6042  i++; // +1 from loop, we've updated both particles
6043  }
6044  else {
6045  drudeComKE += a[i].mass * a[i].velocity.length2();
6046  }
6047  } // end for
6048 
6049  drudeComKE *= 0.5;
6050  drudeBondKE *= 0.5;
6052  += drudeComKE;
6054  += drudeBondKE;
6055  } // end drudeOn
6056 
6057  } // end else
6058 
6059  kineticEnergy *= 0.5;
6061  ADD_VECTOR_OBJECT(reduction,REDUCTION_MOMENTUM,momentum);
6062  ADD_VECTOR_OBJECT(reduction,REDUCTION_ANGULAR_MOMENTUM,angularMomentum);
6063  }
6064 
6065 #ifdef ALTVIRIAL
6066  // THIS IS NOT CORRECTED FOR PAIR INTERACTIONS
6067  {
6068  Tensor altVirial;
6069  for ( int i = 0; i < numAtoms; ++i ) {
6070  altVirial.outerAdd(1.0, patch->f[Results::normal][i], a[i].position);
6071  }
6072  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NORMAL,altVirial);
6073  }
6074  {
6075  Tensor altVirial;
6076  for ( int i = 0; i < numAtoms; ++i ) {
6077  altVirial.outerAdd(1.0, patch->f[Results::nbond][i], a[i].position);
6078  }
6079  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_NBOND,altVirial);
6080  }
6081  {
6082  Tensor altVirial;
6083  for ( int i = 0; i < numAtoms; ++i ) {
6084  altVirial.outerAdd(1.0, patch->f[Results::slow][i], a[i].position);
6085  }
6086  ADD_TENSOR_OBJECT(reduction,REDUCTION_ALT_VIRIAL_SLOW,altVirial);
6087  }
6088 #endif
6089 
6090  // For non-Multigrator doKineticEnergy = 1 always
6092  {
6093  BigReal intKineticEnergy = 0;
6094  Tensor intVirialNormal;
6095  Tensor intVirialNbond;
6096  Tensor intVirialSlow;
6097 
6098  int hgs;
6099  for ( int i = 0; i < numAtoms; i += hgs ) {
6100 #if CMK_BLUEGENEL
6101  CmiNetworkProgress();
6102 #endif
6103  hgs = a[i].hydrogenGroupSize;
6104  int j;
6105  BigReal m_cm = 0;
6106  Position x_cm(0,0,0);
6107  Velocity v_cm(0,0,0);
6108  for ( j = i; j < (i+hgs); ++j ) {
6109  m_cm += a[j].mass;
6110  x_cm += a[j].mass * a[j].position;
6111  v_cm += a[j].mass * a[j].velocity;
6112  }
6113  x_cm /= m_cm;
6114  v_cm /= m_cm;
6115  int fixedAtomsOn = simParams->fixedAtomsOn;
6116  if ( simParams->pairInteractionOn ) {
6117  int pairInteractionSelf = simParams->pairInteractionSelf;
6118  for ( j = i; j < (i+hgs); ++j ) {
6119  if ( a[j].partition != 1 &&
6120  ( pairInteractionSelf || a[j].partition != 2 ) ) continue;
6121  // net force treated as zero for fixed atoms
6122  if ( fixedAtomsOn && a[j].atomFixed ) continue;
6123  BigReal mass = a[j].mass;
6124  Vector v = a[j].velocity;
6125  Vector dv = v - v_cm;
6126  intKineticEnergy += mass * (v * dv);
6127  Vector dx = a[j].position - x_cm;
6128  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
6129  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
6130  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
6131  }
6132  } else {
6133  for ( j = i; j < (i+hgs); ++j ) {
6134  // net force treated as zero for fixed atoms
6135  if ( fixedAtomsOn && a[j].atomFixed ) continue;
6136  BigReal mass = a[j].mass;
6137  Vector v = a[j].velocity;
6138  Vector dv = v - v_cm;
6139  intKineticEnergy += mass * (v * dv);
6140  Vector dx = a[j].position - x_cm;
6141  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
6142  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
6143  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
6144  }
6145  }
6146  }
6147 
6148  intKineticEnergy *= 0.5;
6150  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
6151  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NBOND,intVirialNbond);
6152  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_SLOW,intVirialSlow);
6153  }
6154 
6156  // subtract off internal virial term, calculated as for intVirial.
6157  int nslabs = simParams->pressureProfileSlabs;
6158  const Lattice &lattice = patch->lattice;
6159  BigReal idz = nslabs/lattice.c().z;
6160  BigReal zmin = lattice.origin().z - 0.5*lattice.c().z;
6161  int useGroupPressure = simParams->useGroupPressure;
6162 
6163  int hgs;
6164  for (int i=0; i<numAtoms; i += hgs) {
6165  int j;
6166  hgs = a[i].hydrogenGroupSize;
6167  BigReal m_cm = 0;
6168  Position x_cm(0,0,0);
6169  for (j=i; j< i+hgs; ++j) {
6170  m_cm += a[j].mass;
6171  x_cm += a[j].mass * a[j].position;
6172  }
6173  x_cm /= m_cm;
6174 
6175  BigReal z = a[i].position.z;
6176  int slab = (int)floor((z-zmin)*idz);
6177  if (slab < 0) slab += nslabs;
6178  else if (slab >= nslabs) slab -= nslabs;
6179  int partition = a[i].partition;
6180  int ppoffset = 3*(slab + nslabs*partition);
6181  for (j=i; j < i+hgs; ++j) {
6182  BigReal mass = a[j].mass;
6183  Vector dx = a[j].position - x_cm;
6184  const Vector &fnormal = patch->f[Results::normal][j];
6185  const Vector &fnbond = patch->f[Results::nbond][j];
6186  const Vector &fslow = patch->f[Results::slow][j];
6187  BigReal wxx = (fnormal.x + fnbond.x + fslow.x) * dx.x;
6188  BigReal wyy = (fnormal.y + fnbond.y + fslow.y) * dx.y;
6189  BigReal wzz = (fnormal.z + fnbond.z + fslow.z) * dx.z;
6190  pressureProfileReduction->item(ppoffset ) -= wxx;
6191  pressureProfileReduction->item(ppoffset+1) -= wyy;
6192  pressureProfileReduction->item(ppoffset+2) -= wzz;
6193  }
6194  }
6195  }
6196 
6197  // For non-Multigrator doVirial = 1 always
6198  if (patch->flags.doVirial)
6199  {
6200  if ( simParams->fixedAtomsOn ) {
6201  Tensor fixVirialNormal;
6202  Tensor fixVirialNbond;
6203  Tensor fixVirialSlow;
6204  Vector fixForceNormal = 0;
6205  Vector fixForceNbond = 0;
6206  Vector fixForceSlow = 0;
6207 
6208  calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6209 
6210 #if 0
6211  auto printTensor = [](const Tensor& t, const std::string& name){
6212  CkPrintf("%s", name.c_str());
6213  CkPrintf("\n%12.5lf %12.5lf %12.5lf\n"
6214  "%12.5lf %12.5lf %12.5lf\n"
6215  "%12.5lf %12.5lf %12.5lf\n",
6216  t.xx, t.xy, t.xz,
6217  t.yx, t.yy, t.yz,
6218  t.zx, t.zy, t.zz);
6219  };
6220  printTensor(fixVirialNormal, "fixVirialNormal = ");
6221  printTensor(fixVirialNbond, "fixVirialNbond = ");
6222  printTensor(fixVirialSlow, "fixVirialSlow = ");
6223 #endif
6224 
6225  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,fixVirialNormal);
6226  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NBOND,fixVirialNbond);
6227  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_SLOW,fixVirialSlow);
6228  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NORMAL,fixForceNormal);
6229  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NBOND,fixForceNbond);
6230  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_SLOW,fixForceSlow);
6231  }
6232  }
6233 #endif // UPPER_BOUND
6234 
6235  reduction->submit();
6236 #ifndef UPPER_BOUND
6238 #endif
6239 }
6240 
6242 {
6243  FullAtom *a = patch->atom.begin();
6244  Force *f1 = patch->f[Results::normal].begin();
6245  Force *f2 = patch->f[Results::nbond].begin();
6246  Force *f3 = patch->f[Results::slow].begin();
6247  const bool fixedAtomsOn = simParams->fixedAtomsOn;
6248  const bool drudeHardWallOn = simParams->drudeHardWallOn;
6249  const double drudeBondLen = simParams->drudeBondLen;
6250  const double drudeBondLen2 = drudeBondLen * drudeBondLen;
6251  const double drudeStep = 0.1/(TIMEFACTOR*TIMEFACTOR);
6252  const double drudeMove = 0.01;
6253  const double drudeStep2 = drudeStep * drudeStep;
6254  const double drudeMove2 = drudeMove * drudeMove;
6255  int numAtoms = patch->numAtoms;
6256 
6257  reduction->item(REDUCTION_ATOM_CHECKSUM) += numAtoms;
6258 
6259  for ( int i = 0; i < numAtoms; ++i ) {
6260 #if 0
6261  printf("ap[%2d]= %f %f %f\n", i, a[i].position.x, a[i].position.y, a[i].position.z);
6262  printf("f1[%2d]= %f %f %f\n", i, f1[i].x, f1[i].y, f1[i].z);
6263  printf("f2[%2d]= %f %f %f\n", i, f2[i].x, f2[i].y, f2[i].z);
6264  //printf("f3[%2d]= %f %f %f\n", i, f3[i].x, f3[i].y, f3[i].z);
6265 #endif
6266  f1[i] += f2[i] + f3[i]; // add all forces
6267  if ( drudeHardWallOn && i && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) ) { // drude particle
6268  if ( ! fixedAtomsOn || ! a[i].atomFixed ) {
6269  if ( drudeStep2 * f1[i].length2() > drudeMove2 ) {
6270  a[i].position += drudeMove * f1[i].unit();
6271  } else {
6272  a[i].position += drudeStep * f1[i];
6273  }
6274  if ( (a[i].position - a[i-1].position).length2() > drudeBondLen2 ) {
6275  a[i].position = a[i-1].position + drudeBondLen * (a[i].position - a[i-1].position).unit();
6276  }
6277  }
6278  Vector netf = f1[i-1] + f1[i];
6279  if ( fixedAtomsOn && a[i-1].atomFixed ) netf = 0;
6280  f1[i-1] = netf;
6281  f1[i] = 0.;
6282  }
6283  if ( fixedAtomsOn && a[i].atomFixed ) f1[i] = 0;
6284  }
6285 
6286  f2 = f3 = 0; // included in f1
6287 
6288  BigReal maxv2 = 0.;
6289 
6290  for ( int i = 0; i < numAtoms; ++i ) {
6291  BigReal v2 = a[i].velocity.length2();
6292  if ( v2 > 0. ) {
6293  if ( v2 > maxv2 ) maxv2 = v2;
6294  } else {
6295  v2 = f1[i].length2();
6296  if ( v2 > maxv2 ) maxv2 = v2;
6297  }
6298  }
6299 
6300  if ( fmax2 > 10. * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR * TIMEFACTOR )
6301  { Tensor virial; patch->minimize_rattle2( 0.1 * TIMEFACTOR / sqrt(maxv2), &virial, true /* forces */); }
6302 
6303  BigReal fdotf = 0;
6304  BigReal fdotv = 0;
6305  BigReal vdotv = 0;
6306  int numHuge = 0;
6307  for ( int i = 0; i < numAtoms; ++i ) {
6308  if ( simParams->fixedAtomsOn && a[i].atomFixed ) continue;
6309  if ( drudeHardWallOn && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) ) continue; // drude particle
6310  Force f = f1[i];
6311  BigReal ff = f * f;
6312  if ( ff > fmax2 ) {
6313  if (simParams->printBadContacts) {
6314  CkPrintf("STEP(%i) MIN_HUGE[%i] f=%e kcal/mol/A\n",patch->flags.sequence,patch->pExt[i].id,ff);
6315  }
6316  ++numHuge;
6317  // pad scaling so minimizeMoveDownhill() doesn't miss them
6318  BigReal fmult = 1.01 * sqrt(fmax2/ff);
6319  f *= fmult; ff = f * f;
6320  f1[i] *= fmult;
6321  }
6322  fdotf += ff;
6323  fdotv += f * a[i].velocity;
6324  vdotv += a[i].velocity * a[i].velocity;
6325  }
6326 
6327 #if 0
6328  printf("fdotf = %f\n", fdotf);
6329  printf("fdotv = %f\n", fdotv);
6330  printf("vdotv = %f\n", vdotv);
6331 #endif
6336 
6337  {
6338  Tensor intVirialNormal;
6339  Tensor intVirialNbond;
6340  Tensor intVirialSlow;
6341 
6342  int hgs;
6343  for ( int i = 0; i < numAtoms; i += hgs ) {
6344  hgs = a[i].hydrogenGroupSize;
6345  int j;
6346  BigReal m_cm = 0;
6347  Position x_cm(0,0,0);
6348  for ( j = i; j < (i+hgs); ++j ) {
6349  m_cm += a[j].mass;
6350  x_cm += a[j].mass * a[j].position;
6351  }
6352  x_cm /= m_cm;
6353  for ( j = i; j < (i+hgs); ++j ) {
6354  BigReal mass = a[j].mass;
6355  // net force treated as zero for fixed atoms
6356  if ( simParams->fixedAtomsOn && a[j].atomFixed ) continue;
6357  Vector dx = a[j].position - x_cm;
6358  intVirialNormal.outerAdd(1.0, patch->f[Results::normal][j], dx);
6359  intVirialNbond.outerAdd(1.0, patch->f[Results::nbond][j], dx);
6360  intVirialSlow.outerAdd(1.0, patch->f[Results::slow][j], dx);
6361  }
6362  }
6363 
6364  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NORMAL,intVirialNormal);
6365  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_NBOND,intVirialNbond);
6366  ADD_TENSOR_OBJECT(reduction,REDUCTION_INT_VIRIAL_SLOW,intVirialSlow);
6367  }
6368 
6369  if ( simParams->fixedAtomsOn ) {
6370  Tensor fixVirialNormal;
6371  Tensor fixVirialNbond;
6372  Tensor fixVirialSlow;
6373  Vector fixForceNormal = 0;
6374  Vector fixForceNbond = 0;
6375  Vector fixForceSlow = 0;
6376 
6377  calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6378 
6379  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NORMAL,fixVirialNormal);
6380  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_NBOND,fixVirialNbond);
6381  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_SLOW,fixVirialSlow);
6382  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NORMAL,fixForceNormal);
6383  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_NBOND,fixForceNbond);
6384  ADD_VECTOR_OBJECT(reduction,REDUCTION_EXT_FORCE_SLOW,fixForceSlow);
6385  }
6386 
6387  reduction->submit();
6388 }
6389 
6390 void Sequencer::submitCollections(int step, int zeroVel)
6391 {
6392  //
6393  // DJH: Copy updates of SOA back into AOS.
6394  // Do we need to update everything or is it safe to just update
6395  // positions and velocities separately, as needed?
6396  //
6397  //patch->copy_updates_to_AOS();
6398 
6399  if (simParams->IMDon &&
6401  !(step % simParams->IMDfreq) &&
6403  (step != simParams->firstTimestep)) {
6405  }
6406 
6407  NAMD_EVENT_RANGE_2(patch->flags.event_on,
6408  NamdProfileEvent::SUBMIT_COLLECTIONS);
6409  int prec;
6410  int dcdSelectionIndex;
6411  std::tie (prec, dcdSelectionIndex) = Output::coordinateNeeded(step);
6412  if ( prec ) {
6413  collection->submitPositions(step,patch->atom,patch->lattice,prec,dcdSelectionIndex);
6414  }
6415  prec = Output::velocityNeeded(step);
6416  if ( prec ) {
6417  collection->submitVelocities(step,zeroVel,patch->atom,prec);
6418  }
6419  prec = Output::forceNeeded(step);
6420  if ( prec ) {
6421  int maxForceUsed = patch->flags.maxForceUsed;
6422  if ( maxForceUsed > Results::slow ) maxForceUsed = Results::slow;
6423  collection->submitForces(step,patch->atom,maxForceUsed,patch->f,prec);
6424  }
6425 }
6426 
6427 void Sequencer::runComputeObjects(int migration, int pairlists, int pressureStep)
6428 {
6429  if ( migration ) pairlistsAreValid = 0;
6430 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC)
6431  if ( pairlistsAreValid &&
6433  && ( pairlistsAge > pairlistsAgeLimit ) ) {
6434  pairlistsAreValid = 0;
6435  }
6436 #else
6438  pairlistsAreValid = 0;
6439  }
6440 #endif
6441  if ( ! simParams->usePairlists ) pairlists = 0;
6442  patch->flags.usePairlists = pairlists || pairlistsAreValid;
6444  pairlists && ! pairlistsAreValid;
6445 
6446  if ( simParams->singleTopology ) patch->reposition_all_alchpairs();
6447  if ( simParams->lonepairs ) patch->reposition_all_lonepairs();
6448 
6449  //
6450  // DJH: Copy updates of SOA back into AOS.
6451  // The positionsReady() routine starts force computation and atom migration.
6452  //
6453  // We could reduce amount of copying here by checking migration status
6454  // and copying velocities only when migrating. Some types of simulation
6455  // always require velocities, such as Lowe-Anderson.
6456  //
6457  //patch->copy_updates_to_AOS();
6458 
6459  patch->positionsReady(migration); // updates flags.sequence
6460 
6461  int seq = patch->flags.sequence;
6462  int basePriority = ( (seq & 0xffff) << 15 )
6464  if ( patch->flags.doGBIS && patch->flags.doNonbonded) {
6465  priority = basePriority + GB1_COMPUTE_HOME_PRIORITY;
6466  suspend(); // until all deposit boxes close
6468  priority = basePriority + GB2_COMPUTE_HOME_PRIORITY;
6469  suspend();
6471  priority = basePriority + COMPUTE_HOME_PRIORITY;
6472  suspend();
6473  } else {
6474  priority = basePriority + COMPUTE_HOME_PRIORITY;
6475  suspend(); // until all deposit boxes close
6476  }
6477 
6478  //
6479  // DJH: Copy all data into SOA from AOS.
6480  //
6481  // We need everything copied after atom migration.
6482  // When doing force computation without atom migration,
6483  // all data except forces will already be up-to-date in SOA
6484  // (except maybe for some special types of simulation).
6485  //
6486  //patch->copy_all_to_SOA();
6487 
6488  //
6489  // DJH: Copy forces to SOA.
6490  // Force available after suspend() has returned.
6491  //
6492  //patch->copy_forces_to_SOA();
6493 
6495  pairlistsAreValid = 1;
6496  pairlistsAge = 0;
6497  }
6498  // For multigrator, do not age pairlist during pressure step
6499  // NOTE: for non-multigrator pressureStep = 0 always
6500  if ( pairlistsAreValid && !pressureStep ) ++pairlistsAge;
6501 
6502  if (simParams->lonepairs) {
6503  {
6504  Tensor virial;
6505  patch->redistrib_lonepair_forces(Results::normal, &virial);
6506  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virial);
6507  }
6508  if (patch->flags.doNonbonded) {
6509  Tensor virial;
6510  patch->redistrib_lonepair_forces(Results::nbond, &virial);
6511  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, virial);
6512  }
6514  Tensor virial;
6515  patch->redistrib_lonepair_forces(Results::slow, &virial);
6516  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, virial);
6517  }
6518  } else if (simParams->watmodel == WaterModel::TIP4) {
6519  {
6520  Tensor virial;
6521  patch->redistrib_tip4p_forces(Results::normal, &virial);
6522  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virial);
6523  }
6524  if (patch->flags.doNonbonded) {
6525  Tensor virial;
6526  patch->redistrib_tip4p_forces(Results::nbond, &virial);
6527  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, virial);
6528  }
6530  Tensor virial;
6531  patch->redistrib_tip4p_forces(Results::slow, &virial);
6532  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, virial);
6533  }
6534  } else if (simParams->watmodel == WaterModel::SWM4) {
6535  {
6536  Tensor virial;
6537  patch->redistrib_swm4_forces(Results::normal, &virial);
6538  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NORMAL, virial);
6539  }
6540  if (patch->flags.doNonbonded) {
6541  Tensor virial;
6542  patch->redistrib_swm4_forces(Results::nbond, &virial);
6543  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_NBOND, virial);
6544  }
6546  Tensor virial;
6547  patch->redistrib_swm4_forces(Results::slow, &virial);
6548  ADD_TENSOR_OBJECT(reduction, REDUCTION_VIRIAL_SLOW, virial);
6549  }
6550  }
6551 
6552  if (simParams->singleTopology) {
6553  patch->redistrib_alchpair_forces(Results::normal);
6554  if (patch->flags.doNonbonded) {
6555  patch->redistrib_alchpair_forces(Results::nbond);
6556  }
6558  patch->redistrib_alchpair_forces(Results::slow);
6559  }
6560  }
6561 
6562  if ( patch->flags.doMolly ) {
6563  Tensor virial;
6564  patch->mollyMollify(&virial);
6565  ADD_TENSOR_OBJECT(reduction,REDUCTION_VIRIAL_SLOW,virial);
6566  }
6567 
6568 
6569  // BEGIN LA
6570  if (patch->flags.doLoweAndersen) {
6572  }
6573  // END LA
6574 //TODO:HIP check if this applies to HIP
6575 #ifdef NAMD_CUDA_XXX
6576  int numAtoms = patch->numAtoms;
6577  FullAtom *a = patch->atom.begin();
6578  for ( int i=0; i<numAtoms; ++i ) {
6579  CkPrintf("%d %g %g %g\n", a[i].id,
6580  patch->f[Results::normal][i].x +
6581  patch->f[Results::nbond][i].x +
6582  patch->f[Results::slow][i].x,
6583  patch->f[Results::normal][i].y +
6584  patch->f[Results::nbond][i].y +
6585  patch->f[Results::slow][i].y,
6586  patch->f[Results::normal][i].z +
6587  patch->f[Results::nbond][i].z +
6588  patch->f[Results::slow][i].z);
6589  CkPrintf("%d %g %g %g\n", a[i].id,
6590  patch->f[Results::normal][i].x,
6591  patch->f[Results::nbond][i].x,
6592  patch->f[Results::slow][i].x);
6593  CkPrintf("%d %g %g %g\n", a[i].id,
6594  patch->f[Results::normal][i].y,
6595  patch->f[Results::nbond][i].y,
6596  patch->f[Results::slow][i].y);
6597  CkPrintf("%d %g %g %g\n", a[i].id,
6598  patch->f[Results::normal][i].z,
6599  patch->f[Results::nbond][i].z,
6600  patch->f[Results::slow][i].z);
6601  }
6602 #endif
6603 
6604 //#undef PRINT_FORCES
6605 //#define PRINT_FORCES 1
6606 #if PRINT_FORCES
6607  int numAtoms = patch->numAtoms;
6608  FullAtom *a = patch->atom.begin();
6609  for ( int i=0; i<numAtoms; ++i ) {
6610  float fxNo = patch->f[Results::normal][i].x;
6611  float fxNb = patch->f[Results::nbond][i].x;
6612  float fxSl = patch->f[Results::slow][i].x;
6613  float fyNo = patch->f[Results::normal][i].y;
6614  float fyNb = patch->f[Results::nbond][i].y;
6615  float fySl = patch->f[Results::slow][i].y;
6616  float fzNo = patch->f[Results::normal][i].z;
6617  float fzNb = patch->f[Results::nbond][i].z;
6618  float fzSl = patch->f[Results::slow][i].z;
6619  float fx = fxNo+fxNb+fxSl;
6620  float fy = fyNo+fyNb+fySl;
6621  float fz = fzNo+fzNb+fzSl;
6622 
6623  float f = sqrt(fx*fx+fy*fy+fz*fz);
6624  int id = patch->pExt[i].id;
6625  int seq = patch->flags.sequence;
6626  float x = patch->p[i].position.x;
6627  float y = patch->p[i].position.y;
6628  float z = patch->p[i].position.z;
6629  //CkPrintf("FORCE(%04i)[%04i] = <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e> <<% .4e, % .4e, % .4e>>\n", seq,id,
6630  CkPrintf("FORCE(%04i)[%04i] = % .9e % .9e % .9e\n", seq,id,
6631  //CkPrintf("FORCE(%04i)[%04i] = <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e> <% .4e, % .4e, % .4e>\n", seq,id,
6632 //fxNo,fyNo,fzNo,
6633 fxNb,fyNb,fzNb
6634 //fxSl,fySl,fzSl,
6635 //fx,fy,fz
6636 );
6637  }
6638 #endif
6639 }
6640 
6641 void Sequencer::rebalanceLoad(int timestep) {
6642  if ( ! ldbSteps ) {
6644  }
6645  if ( ! --ldbSteps ) {
6646  patch->submitLoadStats(timestep);
6647  ldbCoordinator->rebalance(this,patch->getPatchID());
6648  pairlistsAreValid = 0;
6649  }
6650 }
6651 
6652 void Sequencer::cycleBarrier(int doBarrier, int step) {
6653 #if USE_BARRIER
6654  if (doBarrier)
6655  // Blocking receive for the cycle barrier.
6656  broadcast->cycleBarrier.get(step);
6657 #endif
6658 }
6659 
6661  // Blocking receive for the trace barrier.
6662  broadcast->traceBarrier.get(step);
6663 }
6664 
6665 #ifdef MEASURE_NAMD_WITH_PAPI
6666 void Sequencer::papiMeasureBarrier(int step){
6667  // Blocking receive for the PAPI measure barrier.
6668  broadcast->papiMeasureBarrier.get(step);
6669 }
6670 #endif
6671 
6674  CthFree(thread);
6675  CthSuspend();
6676 }
static Node * Object()
Definition: Node.h:86
HomePatch *const patch
Definition: Sequencer.h:323
Real atomcharge(int anum) const
Definition: Molecule.h:1124
SubmitReduction * multigratorReduction
Definition: Sequencer.h:309
Vector gaussian_vector(void)
Definition: Random.h:219
void rescaleVelocities(int)
Definition: Sequencer.C:5405
void finishReduction(bool doEnergyVirial)
double * vel_y
Definition: NamdTypes.h:397
int doKineticEnergy
Definition: Sequencer.h:310
void minimizationQuenchVelocity(void)
Definition: Sequencer.C:5794
int period
period for some step dependent event (e.g. stepsPerCycle)
Definition: Sequencer.C:143
NAMD_HOST_DEVICE void rescale(Tensor factor)
Definition: Lattice.h:60
void max(int i, BigReal v)
Definition: ReductionMgr.h:349
int init(int initstep, int initperiod, int delta=0)
Definition: Sequencer.C:159
#define RECIP_TIMEFACTOR
Definition: common.h:61
DCDParams dcdSelectionParams[16]
Definition: Molecule.h:482
void barrier(const SynchronousCollectiveScope scope)
BigReal zy
Definition: Tensor.h:19
Real langevinParam
Definition: NamdTypes.h:220
Bool berendsenPressureOn
void tcoupleVelocities(BigReal, int)
Definition: Sequencer.C:5576
void addMovDragToPosition(BigReal)
Definition: Sequencer.C:4414
void terminate(void)
Definition: Sequencer.C:6672
BigReal soluteScalingFactorCharge
void submitForces(int seq, FullAtomList &a, int maxForceUsed, ForceList *f, int prec)
virtual void algorithm(void)
Definition: Sequencer.C:289
void get_rotdrag_params(BigReal &v, Vector &a, Vector &p, int atomnum) const
Definition: Molecule.h:1419
void langevinVelocitiesBBK2_SOA(BigReal timestep)
Definition: Sequencer.C:3343
#define NAMD_EVENT_STOP(eon, id)
int frequency
Definition: common.h:255
Bool is_atom_movdragged(int atomnum) const
Definition: Molecule.h:1289
SubmitReduction * pressureProfileReduction
Definition: Sequencer.h:325
void suspendULTs()
int getNumAtoms() const
Definition: Patch.h:105
void minimize_rattle2(const BigReal, Tensor *virial, bool forces=false)
Definition: HomePatch.C:4382
void integrate(int)
Definition: Sequencer.C:3887
friend class SequencerCUDA
Definition: Sequencer.h:49
HomePatch * patch
Definition: HomePatchList.h:23
Definition: PDB.h:36
void scaleVelocities(const BigReal velScale)
Definition: Sequencer.C:4946
void positionsReady_SOA(int doMigration=0)
Definition: HomePatch.C:971
#define GB1_COMPUTE_HOME_PRIORITY
Definition: Priorities.h:56
void addVelocityToPosition(BigReal)
Definition: Sequencer.C:5665
SubmitReduction * reduction
Definition: Sequencer.h:324
NAMD_HOST_DEVICE Vector c() const
Definition: Lattice.h:270
BigReal xz
Definition: Tensor.h:17
SubmitReduction * min_reduction
Definition: Sequencer.h:228
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
Bool is_atom_exPressure(int atomnum) const
Definition: Molecule.h:1544
SimpleBroadcastObject< int > traceBarrier
Definition: Broadcasts.h:89
BigReal accelMDLastStep
void maximumMove(BigReal)
Definition: Sequencer.C:5749
Bool monteCarloPressureOn
int marginViolations
Definition: HomePatch.h:401
Definition: common.h:275
#define BOLTZMANN
Definition: common.h:54
Definition: Node.h:78
double * f_normal_z
Definition: NamdTypes.h:430
const GlobalMasterIMD * getIMD()
Definition: IMDOutput.h:43
void cycleBarrier(int, int)
Definition: Sequencer.C:6652
#define FILE_OUTPUT
Definition: Output.h:25
IMDOutput * imd
Definition: Node.h:186
double * f_normal_y
Definition: NamdTypes.h:429
Position fixedPosition
Definition: NamdTypes.h:212
Lattice & lattice
Definition: Patch.h:127
void submitCollections_SOA(int step, int zeroVel=0)
Definition: Sequencer.C:3190
Bool globalMasterScaleByFrequency
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:45
SimpleBroadcastObject< Vector > momentumCorrection
Definition: Broadcasts.h:82
void addRotDragToPosition(BigReal)
Definition: Sequencer.C:4433
static PatchMap * Object()
Definition: PatchMap.h:27
void saveForce(const int ftag=Results::normal)
Definition: Sequencer.C:5615
void registerIDsFullAtom(const FullAtom *begin, const FullAtom *end)
Definition: AtomMap.C:50
CmiNodeLock printlock
Definition: PatchData.h:163
#define EVAL_MEASURE
Definition: Output.h:27
double * f_slow_y
Definition: NamdTypes.h:435
Definition: Vector.h:72
void langevinVelocitiesBBK2(BigReal)
Definition: Sequencer.C:5154
void monteCarloPressureControl(const int step, const int doMigration, const int doEnergy, const int doVirial, const int maxForceNumber, const int doGlobal)
virtual void submit(void)=0
Output * output
Definition: Node.h:185
#define ADD_TENSOR_OBJECT(R, RL, D)
Definition: ReductionMgr.h:44
SimParameters * simParameters
Definition: Node.h:181
int slowFreq
Definition: Sequencer.h:297
void addForceToMomentum(FullAtom *__restrict atom_arr, const Force *__restrict force_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
Definition: HomePatch.C:3319
void newMinimizeDirection(BigReal)
Definition: Sequencer.C:4624
void newMinimizePosition(BigReal)
Definition: Sequencer.C:4683
double stochRescaleCoefficient()
Definition: Controller.C:1784
Bool CUDASOAintegrateMode
int rattle1(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:3788
int savePairlists
Definition: PatchTypes.h:41
int nextstep
next step value
Definition: Sequencer.C:144
bool masterThread
Definition: Sequencer.h:331
BigReal reassignTemp
BigReal & item(int i)
Definition: ReductionMgr.h:336
void gbisComputeAfterP2()
Definition: HomePatch.C:4943
#define DebugM(x, y)
Definition: Debug.h:75
void startWork(const LDObjHandle &handle)
HomePatchList * homePatchList()
Definition: PatchMap.C:438
void langevinVelocitiesBBK1(BigReal)
Definition: Sequencer.C:5081
std::ostream & endi(std::ostream &s)
Definition: InfoStream.C:54
BigReal z
Definition: Vector.h:74
char const *const NamdProfileEventStr[]
int getNumDevice()
Definition: DeviceCUDA.h:125
int usePairlists
Definition: PatchTypes.h:40
Position position
Definition: NamdTypes.h:78
BigReal rotDragGlobVel
BigReal yz
Definition: Tensor.h:18
void updateDevicePatchMap(int startup)
int berendsenPressureFreq
SubmitReduction * willSubmit(int setID, int size=-1)
Definition: ReductionMgr.C:368
void rattle1(BigReal, int)
Definition: Sequencer.C:5692
void saveTotalForces(HomePatch *)
SimpleBroadcastObject< BigReal > adaptTemperature
Definition: Broadcasts.h:92
unsigned char get_ss_type(int anum) const
Definition: Molecule.h:1448
void rebalanceLoad(int timestep)
Definition: Sequencer.C:6641
void submitHalfstep(int)
Definition: Sequencer.C:5805
Bool globalMasterStaleForces
static ReductionMgr * Object(void)
Definition: ReductionMgr.h:290
#define iout
Definition: InfoStream.h:51
void addForceToMomentum_SOA(const double scaling, double dt_normal, double dt_nbond, double dt_slow, int maxForceNumber)
Definition: Sequencer.C:2743
int doLoweAndersen
Definition: PatchTypes.h:28
Velocity velocity
Definition: NamdTypes.h:211
int pressureProfileSlabs
void minimizeMoveDownhill(BigReal fmax2)
Definition: Sequencer.C:4602
Patch * patch(PatchID pid)
Definition: PatchMap.h:244
void addForceToMomentum(BigReal, const int ftag=Results::normal, const int useSaved=0)
Definition: Sequencer.C:5626
void submitReductions_SOA()
Definition: Sequencer.C:2985
std::vector< PatchRecord > & getPatches()
static PatchMap * ObjectOnPe(int pe)
Definition: PatchMap.h:28
float * langScalVelBBK2
derived from langevinParam
Definition: NamdTypes.h:419
uint32 groupFixed
Definition: NamdTypes.h:163
void pauseWork(const LDObjHandle &handle)
void langevinPiston(int)
Definition: Sequencer.C:5325
SimpleBroadcastObject< BigReal > tcoupleCoefficient
Definition: Broadcasts.h:79
int NAMD_gcd(int a, int b)
Definition: common.C:103
void exchangeCheckpoint(int scriptTask, int &bpc)
Definition: HomePatch.C:5263
AtomMapper * atomMapper
Definition: Patch.h:159
Bool pairInteractionOn
float * gaussrand_y
Definition: NamdTypes.h:424
Molecule stores the structural information for the system.
Definition: Molecule.h:174
LDObjHandle ldObjHandle
Definition: HomePatch.h:554
void wakeULTs()
double * pos_y
Definition: NamdTypes.h:378
void split(int iStream, int numStreams)
Definition: Random.h:77
static NAMD_HOST_DEVICE Tensor identity(BigReal v1=1.0)
Definition: Tensor.h:31
void addForceToMomentum3(const BigReal timestep1, const int ftag1, const int useSaved1, const BigReal timestep2, const int ftag2, const int useSaved2, const BigReal timestep3, const int ftag3, const int useSaved3)
Definition: Sequencer.C:5641
void positionsReady(int doMigration=0)
Definition: HomePatch.C:1895
Definition: Patch.h:35
Bool useDeviceMigration
float * mass
Definition: NamdTypes.h:405
Flags flags
Definition: Patch.h:128
void submitHalfstep_SOA()
Definition: Sequencer.C:2885
WaterModel watmodel
SimpleBroadcastObject< BigReal > stochRescaleCoefficient
Definition: Broadcasts.h:80
SimpleBroadcastObject< int > monteCarloBarostatAcceptance
Definition: Broadcasts.h:84
double * f_nbond_y
Definition: NamdTypes.h:432
uint32 id
Definition: NamdTypes.h:160
void revert(void)
Definition: HomePatch.C:5232
void submitCollections(int step, int zeroVel=0)
Definition: Sequencer.C:6390
void stochRescaleVelocities_SOA(int step)
Definition: Sequencer.C:3857
static void print_vel_SOA(const double *vel_x, const double *vel_y, const double *vel_z, int ilo=0, int ihip1=1)
Definition: Sequencer.C:107
Charge charge
Definition: NamdTypes.h:79
void runComputeObjects_SOA(int migration, int pairlists, int step)
Definition: Sequencer.C:3671
BigReal calcKineticEnergy()
Definition: Sequencer.C:4954
#define SEQ_STK_SZ
Definition: Thread.h:11
void adaptTempUpdate(int)
Definition: Sequencer.C:5467
double * f_nbond_z
Definition: NamdTypes.h:433
void positionsReady_GPU(int doMigration=0, int startup=0)
Bool langevin_useBAOAB
int32 * hydrogenGroupSize
Definition: NamdTypes.h:385
#define TIMER_START(T, TYPE)
Definition: HomePatch.h:264
#define NAIVE
Definition: SimParameters.h:52
Definition: Output.h:35
int rattle1_SOA(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:4659
double * f_normal_x
Definition: NamdTypes.h:428
void calcFixVirial(Tensor &fixVirialNormal, Tensor &fixVirialNbond, Tensor &fixVirialSlow, Vector &fixForceNormal, Vector &fixForceNbond, Vector &fixForceSlow)
Definition: Sequencer.C:5963
float * langevinParam
Definition: NamdTypes.h:406
Definition: Random.h:37
#define NAMD_PROFILE_START()
float * gaussrand_x
fill with Gaussian distributed random numbers
Definition: NamdTypes.h:423
static __device__ __host__ __forceinline__ int computeAtomPad(const int numAtoms, const int tilesize=WARPSIZE)
int numPatches(void) const
Definition: PatchMap.h:59
void awaken(void)
Definition: Sequencer.h:55
static std::pair< int, int > coordinateNeeded(int timestep)
Check if the step requires to output the coordinates.
Definition: Output.C:185
#define NAMD_EVENT_START(eon, id)
int pairlistsAge
Definition: Sequencer.h:232
void stochRescaleVelocities(int)
Definition: Sequencer.C:5599
void rattle1_SOA(BigReal, int)
Definition: Sequencer.C:3654
#define COMPUTE_HOME_PRIORITY
Definition: Priorities.h:76
void constructDevicePatchMap()
static void print_tensor(const Tensor &t)
Definition: Sequencer.C:120
NAMD_HOST_DEVICE BigReal length(void) const
Definition: Vector.h:202
int getMasterPe()
Definition: DeviceCUDA.h:137
NAMD_HOST_DEVICE Position apply_transform(Position data, const Transform &t) const
Definition: Lattice.h:137
BigReal rescaleTemp
void NAMD_bug(const char *err_msg)
Definition: common.C:196
float * gaussrand_z
Definition: NamdTypes.h:425
#define TIMER_REPORT(T)
Definition: HomePatch.h:267
void multigratorPressure(int step, int callNumber)
Definition: Sequencer.C:4811
int doEnergy
Definition: PatchTypes.h:20
static ComputeCUDAMgr * getComputeCUDAMgr()
void berendsenPressure(int)
Definition: Sequencer.C:5261
void submitMomentum(int step)
Definition: Sequencer.C:4720
int doFullElectrostatics
Definition: PatchTypes.h:23
BigReal yx
Definition: Tensor.h:18
Bool adaptTempLangevin
int rescaleVelocities_numTemps
Definition: Sequencer.h:277
double * vel_x
Jim recommends double precision velocity.
Definition: NamdTypes.h:396
int32 * id
Definition: NamdTypes.h:390
void submitLoadStats(int timestep)
Definition: HomePatch.C:5428
void mollyMollify(Tensor *virial)
Definition: HomePatch.C:5159
void runComputeObjects(int migration=1, int pairlists=0, int pressureStep=0)
Definition: Sequencer.C:6427
void awaken(void)
Definition: Controller.C:371
void rebalance(Sequencer *seq, PatchID id)
void rescaleaccelMD(int, int, int)
Definition: Sequencer.C:5424
SimpleBroadcastObject< Tensor > velocityRescaleTensor2
Definition: Broadcasts.h:75
float * charge
Definition: NamdTypes.h:381
int Bool
Definition: common.h:142
BigReal drudeBondLen
CompAtomList p
Definition: Patch.h:153
SimpleBroadcastObject< int > IMDTimeEnergyBarrier
Definition: Broadcasts.h:90
Sequencer(HomePatch *p)
Definition: Sequencer.C:171
BigReal langevinTemp
int time_switch
Definition: imd.h:62
void clearDevicePatchMap()
NAMD_HOST_DEVICE BigReal length2(void) const
Definition: Vector.h:206
int ldbSteps
Definition: Sequencer.h:330
int numAtoms
Definition: Patch.h:151
MTSChoices MTSAlgorithm
#define NAMD_EVENT_RANGE_2(eon, id)
void run(void)
Definition: Sequencer.C:269
SimpleBroadcastObject< int > scriptBarrier
Definition: Broadcasts.h:88
uint8 partition
Definition: NamdTypes.h:81
bool getIsGlobalDevice() const
Definition: DeviceCUDA.h:172
BigReal scriptArg1
BigReal x
Definition: Vector.h:74
uint8 hydrogenGroupSize
Definition: NamdTypes.h:89
const_iterator const_begin(void) const
Definition: ResizeArray.h:39
PatchID getPatchID() const
Definition: Patch.h:114
void scalePositionsVelocities(const Tensor &posScale, const Tensor &velScale)
Definition: Sequencer.C:4774
int monteCarloPressureFreq
int getPesSharingDevice(const int i)
Definition: DeviceCUDA.h:139
BigReal adaptTempT
Definition: Sequencer.h:272
int maxForceUsed
Definition: PatchTypes.h:33
SimpleBroadcastObject< BigReal > velocityRescaleFactor2
Definition: Broadcasts.h:76
int sequence
Definition: PatchTypes.h:18
Bool is_atom_rotdragged(int atomnum) const
Definition: Molecule.h:1305
#define D_MSG(t)
Definition: Debug.h:165
int eventEndOfTimeStep
Definition: Node.C:299
void doMigrationGPU(const int startup, const int doGlobal, const int updatePatchMap)
void langevinPiston_SOA(int step)
Definition: Sequencer.C:3525
#define END_OF_RUN
Definition: Output.h:26
void gbisComputeAfterP1()
Definition: HomePatch.C:4915
void integrate_SOA(int)
Definition: Sequencer.C:2068
void traceBarrier(int)
Definition: Sequencer.C:6660
int doNonbonded
Definition: PatchTypes.h:22
void NAMD_die(const char *err_msg)
Definition: common.C:148
PDB * pdb
Definition: Node.h:183
static LdbCoordinator * Object()
BigReal reassignIncr
void gaussian_array_f(float *a, int n)
Definition: Random.h:258
#define TIMER_INIT_WIDTH(T, TYPE, WIDTH)
Definition: HomePatch.h:263
int getForceSendActive() const
Definition: ComputeGlobal.h:46
static int forceNeeded(int timestep)
Check if the step requires to output the forces.
Definition: Output.C:619
int berendsenPressure_count
Definition: Sequencer.h:294
SimpleBroadcastObject< BigReal > velocityRescaleFactor
Definition: Broadcasts.h:71
void publish(int tag, const T &t)
SimpleBroadcastObject< BigReal > minimizeCoefficient
Definition: Broadcasts.h:81
void reassignVelocities(BigReal, int)
Definition: Sequencer.C:5483
Bool LJPMESerialRealSpaceOn
void langevinVelocitiesBBK1_SOA(BigReal timestep)
Definition: Sequencer.C:3297
SimpleBroadcastObject< Vector > accelMDRescaleFactor
Definition: Broadcasts.h:91
int hardWallDrude(const BigReal, Tensor *virial, SubmitReduction *)
Definition: HomePatch.C:3410
ComputeGlobal * computeGlobalObject
Definition: ComputeMgr.h:160
Elem & item(int i)
Definition: ResizeArray.h:119
void saveForce(const int ftag=Results::normal)
Definition: HomePatch.C:2315
Random * random
Definition: Sequencer.h:321
void runComputeObjectsCUDA(int doMigration, int doGlobal, int pairlists, int nstep, int startup)
BigReal xx
Definition: Tensor.h:17
SimpleBroadcastObject< Tensor > positionRescaleFactor
Definition: Broadcasts.h:72
void buildRattleList_SOA()
Definition: HomePatch.C:4520
int getDeviceID()
Definition: DeviceCUDA.h:144
void langevinVelocities(BigReal)
Definition: Sequencer.C:5044
IMDSessionInfo IMDsendsettings
static CollectionMaster * Object()
void hardWallDrude(BigReal, int)
Definition: Sequencer.C:5677
NodeBroadcast * nodeBroadcast
Definition: PatchData.h:141
void checkpoint(void)
Definition: HomePatch.C:5222
BigReal zz
Definition: Tensor.h:19
#define TIMER_STOP(T, TYPE)
Definition: HomePatch.h:265
void suspend(void)
Definition: Sequencer.C:279
void multigratorTemperature(int step, int callNumber)
Definition: Sequencer.C:4974
static constexpr int num_inline_peer
Definition: CudaRecord.h:36
unsigned int randomSeed
double * recipMass
derived from mass
Definition: NamdTypes.h:404
BigReal initialTemp
void reinitVelocities(void)
Definition: Sequencer.C:5515
int pressureProfileAtomTypes
int checkpoint_berendsenPressure_count
Definition: Sequencer.h:295
#define simParams
Definition: Output.C:131
ControllerBroadcasts * broadcast
Definition: Sequencer.h:328
#define NAMD_EVENT_START_EX(eon, id, str)
iterator begin(void)
Definition: ResizeArray.h:36
void maximumMove_SOA(const double dt, const double maxvel2)
Definition: Sequencer.C:3239
double * pos_z
Definition: NamdTypes.h:379
double * f_slow_x
Definition: NamdTypes.h:434
CollectionMgr *const collection
Definition: Sequencer.h:327
void updateDeviceData(const int startup, const int maxForceUsed, const int doGlobal)
const PatchID patchID
Definition: Patch.h:150
#define GB2_COMPUTE_HOME_PRIORITY
Definition: Priorities.h:64
int numHomePatches(void)
Definition: PatchMap.C:432
Definition: Tensor.h:15
BigReal xy
Definition: Tensor.h:17
iterator end(void)
Definition: ResizeArray.h:37
bool rattleListValid_SOA
Definition: HomePatch.h:454
#define NAMD_PROFILE_STOP()
Bool langevinGammasDiffer
double * pos_x
Definition: NamdTypes.h:377
int doVirial
Definition: PatchTypes.h:21
BigReal y
Definition: Vector.h:74
virtual ~Sequencer(void)
Definition: Sequencer.C:245
BigReal movDragGlobVel
int getNumStepsToRun(void)
bool getIsPmeDevice()
Definition: DeviceCUDA.h:168
int doLCPO
Definition: PatchTypes.h:31
void resetMovingAverage()
Definition: Controller.C:656
void newtonianVelocities(BigReal, const BigReal, const BigReal, const BigReal, const int, const int, const int)
Definition: Sequencer.C:5020
static void print_vel_AOS(const FullAtom *a, int ilo=0, int ihip1=1)
Definition: Sequencer.C:95
void rescaleSoluteCharges(BigReal)
Definition: Sequencer.C:5560
void addVelocityToPosition_SOA(const double dt)
Definition: Sequencer.C:2846
double * vel_z
Definition: NamdTypes.h:398
void setVal(const NodeReduction *other)
Definition: ReductionMgr.C:681
#define SOA_SIMPLIFY_PARAMS
Definition: Sequencer.h:31
Mass mass
Definition: NamdTypes.h:218
void submitVelocities(int seq, int zero, FullAtomList &a, int prec)
Bool pressureProfileOn
void submitMinimizeReductions(int, BigReal fmax2)
Definition: Sequencer.C:6241
#define ADD_VECTOR_OBJECT(R, RL, D)
Definition: ReductionMgr.h:28
BigReal yy
Definition: Tensor.h:18
int doMomenta
Definition: Sequencer.h:311
#define TIMER_DONE(T)
Definition: HomePatch.h:266
#define PDBVELFACTOR
Definition: common.h:57
CudaComputeNonbonded * getCudaComputeNonbonded()
#define TIMEFACTOR
Definition: common.h:55
Bool pairInteractionSelf
int multigratorPressureFreq
static int velocityNeeded(int timestep)
Check if the step requires to output the velocities.
Definition: Output.C:510
int numPatchesOnNode(int node)
Definition: PatchMap.h:60
int bufferOffsetNBPad
Definition: CudaRecord.h:39
double * f_nbond_x
Definition: NamdTypes.h:431
int getDeviceIndex()
Definition: DeviceCUDA.h:166
BigReal maximumMove
#define SPECIAL_PATCH_ID
Definition: Sequencer.C:88
void submitPositions(int seq, FullAtomList &a, Lattice l, int prec, int dcdSelectionIndex)
#define cudaCheck(stmt)
Definition: CudaUtils.h:242
void correctMomentum(int step, BigReal drifttime)
Definition: Sequencer.C:4743
bool getIsMasterDevice()
Definition: DeviceCUDA.C:646
int pairlistsAgeLimit
Definition: Sequencer.h:233
NAMD_HOST_DEVICE void outerAdd(BigReal scale, const Vector &v1, const Vector &v2)
Definition: Tensor.h:255
int pairlistsAreValid
Definition: Sequencer.h:231
int doGBIS
Definition: PatchTypes.h:30
int stochRescale_count
Definition: Sequencer.h:290
int doFullDispersion
Definition: PatchTypes.h:24
std::ostream & iERROR(std::ostream &s)
Definition: InfoStream.C:83
int check(int step)
Definition: Sequencer.C:149
ComputeMgr * computeMgr
Definition: Node.h:172
int maxForceMerged
Definition: PatchTypes.h:34
BigReal reassignHold
bool getIsPmeDevice()
Definition: GlobalGPUMgr.C:100
void addForceToMomentum3(FullAtom *__restrict atom_arr, const Force *__restrict force_arr1, const Force *__restrict force_arr2, const Force *__restrict force_arr3, const BigReal dt1, const BigReal dt2, const BigReal dt3, int num_atoms) __attribute__((__noinline__))
Definition: HomePatch.C:3348
void addVelocityToPosition(FullAtom *__restrict atom_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
Definition: HomePatch.C:3387
void quenchVelocities()
Definition: Sequencer.C:4711
ForceList f[Results::maxNumForces]
Definition: Patch.h:214
float * langScalRandBBK2
from langevinParam and recipMass
Definition: NamdTypes.h:420
void get_movdrag_params(Vector &v, int atomnum) const
Definition: Molecule.h:1413
static GlobalGPUMgr * Object()
Definition: GlobalGPUMgr.h:61
void enableEarlyExit(void)
Definition: Node.C:1464
void submitReductions(int)
Definition: Sequencer.C:5983
#define namd_reciprocal(x)
Definition: Vector.h:69
SimpleBroadcastObject< Tensor > positionRescaleFactor2
Definition: Broadcasts.h:77
void integrate_CUDA_SOA(int scriptTask)
#define RIGID_NONE
Definition: SimParameters.h:80
void loweAndersenFinish()
Definition: HomePatch.C:4881
uint32 atomFixed
Definition: NamdTypes.h:162
int getNumPesSharingDevice()
Definition: DeviceCUDA.h:138
SimParameters *const simParams
Definition: Sequencer.h:322
SimpleBroadcastObject< Tensor > velocityRescaleTensor
Definition: Broadcasts.h:74
NAMD_HOST_DEVICE Vector unit(void) const
Definition: Vector.h:215
BigReal zx
Definition: Tensor.h:19
CompAtomExtList pExt
Definition: Patch.h:181
int energies_switch
Definition: imd.h:63
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
Molecule * molecule
Definition: Node.h:179
NAMD_HOST_DEVICE Vector origin() const
Definition: Lattice.h:278
void rescaleVelocitiesByFactor(BigReal)
Definition: Sequencer.C:5538
double * f_slow_z
Definition: NamdTypes.h:436
int doMolly
Definition: PatchTypes.h:25
int multigratorTemperatureFreq
void reloadCharges()
Definition: Sequencer.C:5548
int doMinimize
Definition: PatchTypes.h:26
#define FORCE_OUTPUT
Definition: Output.h:28
int globalMasterFrequency
double BigReal
Definition: common.h:123
void minimize()
Definition: Sequencer.C:4456
static SynchronousCollectives * Object()
CudaPmeOneDevice * createCudaPmeOneDevice()
int step
Definition: PatchTypes.h:16
#define PATCH_PRIORITY(PID)
Definition: Priorities.h:25
CudaPmeOneDevice * getCudaPmeOneDevice()
void updatePatchOrder(const std::vector< CudaLocalRecord > &data)
for(int i=0;i< n1;++i)
void berendsenPressure_SOA(int step)
Definition: Sequencer.C:3426
int32 numAtoms
number of atoms
Definition: NamdTypes.h:456
void printDevicePatchMap()
BigReal drudeTemp
void compute(const Lattice &lattice, int doEnergyVirial, int step)
void exchangeAtoms(int scriptTask)
Definition: HomePatch.C:5370
T get(int tag, const int expected=-1)
Attempts to retrieve a previously published value for a given tag and id.