1 #ifndef SEQUENCERCUDA_H 2 #define SEQUENCERCUDA_H 10 #include <hiprand/hiprand.h> 33 #ifdef NODEGROUP_FORCE_REGISTER 51 inline static SequencerCUDA *Object() {
return CkpvAccess(SequencerCUDA_instance); }
52 inline static SequencerCUDA *ObjectOnPe(
int pe) {
return CkpvAccessOther(SequencerCUDA_instance, CmiRankOf(pe)); }
54 int numPatchesCheckedIn;
56 std::vector<CthThread> waitingThreads;
58 bool masterThreadSleeping =
false;
59 bool breakSuspends =
false;
64 bool reallocateArrays(
int in_numAtomsHome,
int in_numAtomsHomeAndProxy);
65 void reallocateMigrationDestination();
66 void deallocateArrays();
67 void deallocateStaticArrays();
68 void copyAoSDataToHost();
69 void copyPatchDataToHost();
70 void copyAtomDataToDeviceAoS();
72 void copyAtomDataToDevice(
bool copyForces,
int maxForceNumber);
74 bool copyPatchData(
const bool copyIn,
const bool startup);
75 void copyDataToPeers(
const bool copyIn);
76 void migrationLocalInit();
77 void migrationPerform();
78 void migrationLocalPost(
int startup);
79 void migrationUpdateAdvancedFeatures(
const int startup);
80 void migrationUpdateAtomCounts();
81 void migrationUpdateAtomOffsets();
82 void migrationUpdateRemoteOffsets();
83 void migrationUpdateProxyDestination();
84 void migrationUpdateDestination();
85 void migrationSortAtomsNonbonded();
88 void copyMigrationInfo(
HomePatch *p,
int patchIndex);
90 void assembleOrderedPatchList();
95 void monteCarloPressure_part2(
int step,
int maxForceNumber,
96 const bool doEnergy,
const bool doGloblal,
const bool doVirial);
99 void monteCarloPressure_reject(
Lattice &lattice);
101 void monteCarloPressure_accept(
const int doMigration);
105 inline static void tensor_enforce_symmetry(
Tensor& t) {
117 const double maxvel2,
121 int reassignVelocitiesStep,
122 int langevinPistonStep,
123 int berendsenPressureStep,
126 const int savePairlists,
127 const int usePairlists,
128 const bool doEnergy);
135 const double maxvel2,
139 int langevinPistonStep,
142 const int savePairlists,
143 const int usePairlists,
144 const bool doEnergy);
146 void launch_set_compute_positions();
149 const int doMCPressure,
156 const int langevinPistonStep,
160 const bool doEnergy);
163 const int doMCPressure,
170 const bool requestGlobalForces,
171 const int doGlobalStaleForces,
172 const bool forceRequestedGPU,
176 const bool requestForcesOutput);
179 void copyGlobalForcesToDevice();
181 void copySettleParameter();
182 void finish_part1(
const int copyIn,
183 const int savePairlists,
184 const int usePairlists);
186 void update_patch_flags();
187 void finish_patch_flags(
int isMigration);
188 void updatePairlistFlags(
const int doMigration);
189 void updateDeviceKernels();
190 void setRescalePairlistTolerance(
const bool val);
193 void allocateGPUSavedForces();
194 cudaStream_t stream, stream2;
204 cudaEvent_t stream2CopyDone, stream2CopyAfter;
209 std::vector<AtomMap*> atomMapList;
258 int *d_migrationGroupSize;
259 int *d_migrationGroupIndex;
272 double *d_fixedPosition_x, *d_fixedPosition_y, *d_fixedPosition_z;
274 double *d_f_saved_nbond_x, *d_f_saved_nbond_y, *d_f_saved_nbond_z;
275 double *d_f_saved_slow_x, *d_f_saved_slow_y, *d_f_saved_slow_z;
277 double *d_posNew_raw;
278 double *d_posNew_x, *d_posNew_y, *d_posNew_z;
280 double *d_f_global_x, *d_f_global_y, *d_f_global_z;
282 double *d_rcm_x, *d_rcm_y, *d_rcm_z;
283 double *d_vcm_x, *d_vcm_y, *d_vcm_z;
288 double *d_f_normalMC_x, *d_f_normalMC_y, *d_f_normalMC_z;
289 double *d_f_nbondMC_x, *d_f_nbondMC_y, *d_f_nbondMC_z;
290 double *d_f_slowMC_x, *d_f_slowMC_y, *d_f_slowMC_z;
292 double *d_posMC_x, *d_posMC_y, *d_posMC_z;
296 int *d_moleculeStartIndex;
300 double *d_velNew_x, *d_velNew_y, *d_velNew_z;
301 double *d_posSave_x, *d_posSave_y, *d_posSave_z;
304 int *d_patchOffsetTemp;
306 float *d_rigidBondLength;
310 float *d_langevinParam;
311 float *d_langScalVelBBK2;
312 float *d_langScalRandBBK2;
313 float *d_gaussrand_x, *d_gaussrand_y, *d_gaussrand_z;
314 int *d_hydrogenGroupSize;
316 size_t d_consFailureSize;
318 size_t settleListSize;
320 size_t rattleListSize;
321 int* d_globalToLocalID;
322 int* d_patchToDeviceMap;
323 double3* d_awayDists;
326 double* d_patchMaxAtomMovement;
327 double* d_patchNewTolerance;
328 unsigned int* d_tbcatomic;
334 double *f_global_x, *f_global_y, *f_global_z;
335 double *f_normal_x, *f_normal_y, *f_normal_z;
336 double *f_nbond_x, *f_nbond_y, *f_nbond_z;
337 double *f_slow_x, *f_slow_y, *f_slow_z;
338 double *vel_x, *vel_y, *vel_z;
339 double *pos_x, *pos_y, *pos_z;
345 float *langevinParam;
346 float *langScalVelBBK2;
347 float *langScalRandBBK2;
349 int *hydrogenGroupSize;
350 float *rigidBondLength;
354 double* fixedPosition_x;
355 double* fixedPosition_y;
356 double* fixedPosition_z;
357 int* globalToLocalID;
358 int* patchToDeviceMap;
360 double3* patchCenter;
364 double* patchMaxAtomMovement;
365 double* patchNewTolerance;
366 int* computeNbondPosition;
370 double pairlist_newTolerance;
379 BigReal* intKineticEnergy_half;
403 unsigned int *h_marginViolations;
404 unsigned int *h_periodicCellSmall;
406 unsigned int totalMarginViolations;
409 bool buildRigidLists;
440 double3 *d_fixForceNormal;
441 double3 *d_fixForceNbond;
442 double3 *d_fixForceSlow;
445 int numAtomsHomePrev;
446 int numAtomsHomeAllocated;
447 int numAtomsHomeAndProxy;
448 int numAtomsHomeAndProxyAllocated;
450 int numPatchesGlobal;
451 int numPatchesHomeAndProxy;
454 int marginViolations;
455 bool rescalePairlistTolerance;
456 int nSettle, nRattle;
461 CmiNodeLock printlock;
463 cudaEvent_t eventStart, eventStop;
468 float t_pairlistCheck;
469 float t_setComputePositions;
472 float t_accumulateForceKick;
475 float t_submitReductions1;
476 float t_submitReductions2;
481 std::vector<HomePatch*> patchList;
485 std::vector<HomePatch*> patchListHomeAndProxy;
489 unsigned long long int d_ullmaxtol;
490 SequencerCUDAKernel *CUDASequencerKernel;
491 MigrationCUDAKernel *CUDAMigrationKernel;
492 ComputeRestraintsCUDA *restraintsKernel;
493 ComputeSMDCUDA *SMDKernel;
494 ComputeGroupRestraintsCUDA *groupRestraintsKernel;
495 ComputeGridForceCUDA *gridForceKernel;
497 curandGenerator_t curandGen;
498 ComputeConsForceCUDA *consForceKernel;
500 size_t num_used_grids;
501 std::vector<int> used_grids;
504 unsigned int* deviceQueue;
507 bool** h_patchRecordHasForces;
508 bool** d_patchRecordHasForces;
516 const double maxvel2,
519 int numAtoms,
int part,
520 const bool doEnergy);
521 void submitReductions(
525 int marginViolations,
531 void submitReductionValues();
532 void copyPositionsAndVelocitiesToHost(
bool copyOut,
const int doGlobal);
533 void copyPositionsToHost();
534 void startRun1(
int maxForceNumber,
const Lattice& lat);
547 const bool requestGlobalForces,
548 int doGlobalMasterStateForces,
549 const bool requestForcesOutput,
550 const bool requestGlobalForcesGPU,
554 void redistributeTip4pForces(
555 const int maxForceNumber,
558 void printSOAForces(
char *);
559 void printSOAPositionsAndVelocities();
560 void registerSOAPointersToHost();
561 void copySOAHostRegisterToDevice();
564 void calculateExternalForces(
566 const int maxForceNumber,
570 void atomUpdatePme();
572 void updateHostPatchDataSOA();
573 void saveForceCUDASOA_direct(
575 const bool doForcesOutput,
576 const int maxForceNumber);
577 void copyPositionsToHost_direct();
579 int getNumPatchesHome() {
return numPatchesHome; }
581 double3* getHostPatchMin() {
return patchMin; }
582 double3* getHostPatchMax() {
return patchMax; }
583 double3* getHostAwayDists() {
return awayDists; }
589 #endif // SEQUENCERCUDA_H
friend class SequencerCUDA
static void partition(int *order, const FullAtom *atoms, int begin, int end)
A class for copying atom information from SequencerCUDA to CudaGlobalMasterClient.