1 #ifndef COMPUTEBONDEDCUDA_H 2 #define COMPUTEBONDEDCUDA_H 13 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 20 class ComputeBondedCUDA :
public Compute {
24 static const int CudaTupleTypeSize[Tuples::NUM_TUPLE_TYPES];
25 static const int CudaTupleTypeSizeStage[Tuples::NUM_TUPLE_TYPES];
28 bool initializeCalled;
33 #ifdef NODEGROUP_FORCE_REGISTER 34 std::atomic<int> tupleWorkIndex;
41 std::vector<int> allPatchIDs;
47 std::vector< std::vector<int> > patchIDsPerRank;
55 std::vector<int> patchIDs;
57 SelfCompute(
int type=-1) : type(type), tuples(NULL) {}
58 int operator==(
const SelfCompute &elem)
const {
59 return (elem.type == type);
65 std::vector<char> isBasePatch;
66 std::vector<int> patchIDs;
68 std::vector< Tuples* > tuples;
72 struct ComputeRecord {
73 HomeCompute homeCompute;
75 std::vector< SelfCompute > selfComputes;
79 std::vector< ComputeRecord > computes;
84 std::array< std::list<Tuples*>, Tuples::NUM_TUPLE_TYPES > tupleList;
86 int numTuplesPerType[Tuples::NUM_TUPLE_TYPES];
89 std::vector< AtomMapper* > atomMappers;
95 std::vector<PatchRecord> patches;
98 std::vector<int> patchIndex;
101 std::vector<int> dihedralMultMap;
102 std::vector<int> improperMultMap;
106 int numModifiedExclusions;
109 std::vector<NumExcl> numExclPerRank;
113 bool hasModifiedExclusions;
117 size_t tupleDataSize;
119 std::vector<CudaBondStage> bondTupleData;
120 std::vector<CudaAngleStage> angleTupleData;
121 std::vector<CudaDihedralStage> dihedralTupleData;
122 std::vector<CudaDihedralStage> improperTupleData;
123 std::vector<CudaExclusionStage> modifiedExclusionTupleData;
124 std::vector<CudaExclusionStage> exclusionTupleData;
125 std::vector<CudaCrosstermStage> crosstermTupleData;
126 std::vector<CudaTholeStage> tholeTupleData;
127 std::vector<CudaAnisoStage> anisoTupleData;
128 std::vector<CudaOneFourNbTholeStage> oneFourNbTholeTupleData;
132 #ifdef NODEGROUP_FORCE_REGISTER 133 MigrationBondedCUDAKernel migrationKernel;
134 #endif // NODEGROUP_FORCE_REGISTER 143 double3* h_patchMapCenter;
144 double3* d_patchMapCenter;
150 cudaEvent_t forceDoneEvent;
157 CmiNodeLock printLock;
185 double beforeForceCompute;
198 double* energies_virials;
203 int pswitchTable[3*3];
208 void updatePatches();
210 static void forceDoneCheck(
void *arg,
double walltime);
211 void forceDoneSetCallback();
215 struct TupleCopyWork {
219 int64_t tupleDataPos;
222 std::vector<TupleCopyWork> tupleCopyWorkList;
224 int64_t exclusionStartPos;
225 int64_t exclusionStartPos2;
226 std::vector<CudaBondStage> hostCudaBondStage;
228 #ifdef NODEGROUP_FORCE_REGISTER 229 template <
typename T>
230 void sortTupleList(std::vector<T>& tuples, std::vector<int>& tupleCounts, std::vector<int>& tupleOffsets);
231 void sortAndCopyToDevice();
232 void migrateTuples(
bool startup);
234 template <
typename T,
typename P,
typename D>
235 void copyTupleToStage(
const T& src,
const P* __restrict__ p_array, D& dstval);
237 template <
typename T,
typename P,
typename D>
238 void copyToStage(
const int ntuples,
const T* __restrict__ src,
239 const P* __restrict__ p_array, std::vector<D>& dst);
241 void copyExclusionDataStage(
const int ntuples,
const ExclElem* __restrict__ src,
const int typeSize,
242 std::vector<CudaExclusionStage>& dst1, std::vector<CudaExclusionStage>& dst2, int64_t& pos, int64_t& pos2);
245 void copyBondData(
const int ntuples,
const BondElem* __restrict__ src,
248 void copyBondDatafp32(
const int ntuples,
const BondElem* __restrict__ src,
251 void copyAngleData(
const int ntuples,
const AngleElem* __restrict__ src,
254 template <
bool doDihedral,
typename T,
typename P>
255 void copyDihedralData(
const int ntuples,
const T* __restrict__ src,
256 const P* __restrict__ p_array,
CudaDihedral* __restrict__ dst);
258 template <
bool doDihedral,
typename T,
typename P>
259 void copyDihedralDatafp32(
const int ntuples,
const T* __restrict__ src,
260 const P* __restrict__ p_array,
CudaDihedral* __restrict__ dst);
262 void copyExclusionData(
const int ntuples,
const ExclElem* __restrict__ src,
const int typeSize,
265 void copyCrosstermData(
const int ntuples,
const CrosstermElem* __restrict__ src,
268 void copyTholeData(
const int ntuples,
const TholeElem* __restrict__ src,
271 void copyAnisoData(
const int ntuples,
const AnisoElem* __restrict src,
274 void copyOneFourNbTholeData(
const int ntuples,
const OneFourNbTholeElem* __restrict src,
278 static void tupleCopyWorker(
int first,
int last,
void *result,
int paraNum,
void *param);
279 void tupleCopyWorker(
int first,
int last);
280 static void tupleCopyWorkerExcl(
int first,
int last,
void *result,
int paraNum,
void *param);
281 void tupleCopyWorkerExcl(
int first,
int last);
283 #ifdef NODEGROUP_FORCE_REGISTER 284 void tupleCopyWorkerType(
int tupletype);
294 ~ComputeBondedCUDA();
295 void registerCompute(
int pe,
int type,
PatchIDList& pids);
296 void registerSelfCompute(
int pe,
int type,
int pid);
297 void unregisterBoxesOnPe();
298 void assignPatchesOnPe();
304 void messageEnqueueWork();
306 void openBoxesOnPe(
int startup = 1);
307 void loadTuplesOnPe(
const int startup = 1);
308 void copyTupleData();
309 void copyTupleDataSN();
311 void updateCudaAlchParameters();
313 void updateHostCudaAlchFlags();
314 void updateKernelCudaAlchFlags();
315 void updateHostCudaAlchParameters();
316 void updateKernelCudaAlchParameters();
317 void updateHostCudaAlchLambdas();
318 void updateKernelCudaAlchLambdas();
320 #ifdef NODEGROUP_FORCE_REGISTER 321 void updatePatchRecords();
324 void registerPointersToHost();
325 void copyHostRegisterToDevice();
326 void copyPatchData();
327 void copyTupleDataGPU(
const int startup);
328 void updatePatchOrder(
const std::vector<CudaLocalRecord>& data);
329 #endif // NODEGROUP_FORCE_REGISTER 331 void finishPatchesOnPe();
332 void finishPatches();
333 void finishReductions();
335 std::vector<int>& getBondedPes(
void) {
return pes;}
337 std::vector<PatchRecord>& getPatches() {
return patches; }
340 #endif // BONDED_CUDA 342 #endif // COMPUTEBONDEDCUDA_H
virtual void initialize()
int operator==(const AtomSigInfo &s1, const AtomSigInfo &s2)
virtual void atomUpdate()
virtual void patchReady(PatchID, int doneMigration, int seq)