1 #ifndef CUDATILELISTKERNEL_HIP_H     2 #define CUDATILELISTKERNEL_HIP_H    53 #ifdef NODEGROUP_FORCE_REGISTER   122   template <
typename T>
   124     PtrSize(T* ptr, 
int size) : ptr(ptr), size(size) {}
   132   cudaEvent_t tileListStatEvent;
   133   bool tileListStatEventRecord;
   148   int numTileListsGBIS;
   157   size_t cudaPatchesSize;
   160   size_t cudaComputesSize;
   163   const bool doStreaming;
   165   size_t patchNumListsSize;
   168   size_t emptyPatchesSize;
   170   size_t h_emptyPatchesSize;
   173   unsigned int* sortKeySrc;
   174   size_t sortKeySrcSize;
   175   unsigned int* sortKeyDst;
   176   size_t sortKeyDstSize;
   178   int maxTileListLen_sortKeys;
   180   unsigned int* sortKeys;
   184   size_t minmaxListLenSize;
   197   size_t atomStorageSize;
   204   size_t tileLists1Size;
   206   size_t tileLists2Size;
   208   size_t tileListsGBISSize;
   212   size_t patchPairs1Size;
   214   size_t patchPairs2Size;
   217   int* tileJatomStart1;
   218   size_t tileJatomStart1Size;
   219   int* tileJatomStart2;
   220   size_t tileJatomStart2Size;
   221   int* tileJatomStartGBIS;
   222   size_t tileJatomStartGBISSize;
   226   size_t boundingBoxesSize;
   229   unsigned int* tileListDepth1;
   230   size_t tileListDepth1Size;
   231   unsigned int* tileListDepth2;
   232   size_t tileListDepth2Size;
   236   size_t tileListOrder1Size;
   238   size_t tileListOrder2Size;
   242   size_t tileListPosSize;
   254   size_t tileExcls1Size;
   256   size_t tileExcls2Size;
   260   size_t tempStorageSize;
   267   size_t tileListVirialEnergySize;
   269   int tileListVirialEnergyLength;
   270   int tileListVirialEnergyGBISLength;
   274   void setActiveBuffer(
int activeBufferIn) {activeBuffer = activeBufferIn;}
   277     const bool useJtiles,
   278     const int begin_bit, 
const bool highDepthBitsSet,
   280     const int numTileListsSrc, 
const int numJtilesSrc,
   281     PtrSize<TileList> tileListsSrc, PtrSize<int> tileJatomStartSrc,
   282     PtrSize<unsigned int> tileListDepthSrc, PtrSize<int> tileListOrderSrc,
   283     PtrSize<PatchPairRecord> patchPairsSrc, PtrSize<TileExcl> tileExclsSrc,
   285     const int numTileListsDst, 
const int numJtilesDst,
   286     PtrSize<TileList> tileListsDst, PtrSize<int> tileJatomStartDst,
   287     PtrSize<unsigned int> tileListDepthDst, PtrSize<int> tileListOrderDst,
   288     PtrSize<PatchPairRecord> patchPairsDst, PtrSize<TileExcl> tileExclsDst,
   289     cudaStream_t stream);
   291   void writeTileList(
const char* filename, 
const int numTileLists,
   292     const TileList* d_tileLists, cudaStream_t stream);
   293   void writeTileList(FILE* handle, 
const int numTileLists,
   294     const TileList* d_tileLists, cudaStream_t stream);
   295   void writeTileJatomStart(
const char* filename, 
const int numJtiles,
   296     const int* d_tileJatomStart, cudaStream_t stream);
   297   void writeTileJatomStart(FILE* handle, 
const int numJtiles,
   298     const int* d_tileJatomStart, cudaStream_t stream);
   299   void writeTileExcls(FILE* handle, 
const int numJtiles,
   300     const TileExcl* d_tileExcl, cudaStream_t stream);
   305   size_t outputOrderSize;
   330   int* 
getTileJatomStart() {
return ((activeBuffer == 1) ? tileJatomStart1 : tileJatomStart2);}
   332     return ((activeBuffer == 1) ? tileLists1 : tileLists2);
   334   unsigned int* 
getTileListDepth() {
return ((activeBuffer == 1) ? tileListDepth1 : tileListDepth2);}
   335   int* 
getTileListOrder() {
return ((activeBuffer == 1) ? tileListOrder1 : tileListOrder2);}
   354     int atomStorageSizeIn, 
int numPatchesIn,
   356     cudaStream_t stream);
   359     const int numPatchesIn, 
const int atomStorageSizeIn, 
const int maxTileListLenIn,
   360     const float3 lata, 
const float3 latb, 
const float3 latc,
   361     const CudaPatchRecord* h_cudaPatches, 
const float4* h_xyzq, 
const float plcutoff2In,
   362     const size_t maxShmemPerBlock, cudaStream_t stream, 
const bool atomsChanged, 
   363     const bool allocatePart, 
bool CUDASOAintegratorOn, 
bool deviceMigration);
   365 void reSortTileLists(
const bool doGBIS, 
const bool CUDASOAIntegratorOn, cudaStream_t stream);  
   376     if (!doStreaming) 
return NULL;
   386 #endif // CUDATILELISTKERNEL_H 
CudaTileListKernel(int deviceID, bool doStreaming)
 
void prepareTileList(cudaStream_t stream)
 
void setTileListVirialEnergyLength(int len)
 
PatchPairRecord * getPatchPairs()
 
void clearTileListStat(cudaStream_t stream)
 
TileExcl * getTileExcls()
 
void setTileListVirialEnergyGBISLength(int len)
 
void prepareBuffers(int atomStorageSizeIn, int numPatchesIn, const CudaPatchRecord *h_cudaPatches, cudaStream_t stream)
 
int getTileListVirialEnergyGBISLength()
 
CudaPatchRecord * getCudaPatches()
 
unsigned int * getTileListDepth()
 
BoundingBox * getBoundingBoxes()
 
void updateComputes(const int numComputesIn, const CudaComputeRecord *h_cudaComputes, cudaStream_t stream)
 
TileList * getTileListsGBIS()
 
TileListStat * getTileListStatDevPtr()
 
TileList * getTileLists()
 
int getNumTileListsGBIS()
 
void finishTileList(cudaStream_t stream)
 
int getTileListVirialEnergyLength()
 
int * getTileJatomStart()
 
int * getTileJatomStartGBIS()
 
TileListVirialEnergy * getTileListVirialEnergy()
 
void buildTileLists(const int numTileListsPrev, const int numPatchesIn, const int atomStorageSizeIn, const int maxTileListLenIn, const float3 lata, const float3 latb, const float3 latc, const CudaPatchRecord *h_cudaPatches, const float4 *h_xyzq, const float plcutoff2In, const size_t maxShmemPerBlock, cudaStream_t stream, const bool atomsChanged, const bool allocatePart, bool CUDASOAintegratorOn, bool deviceMigration)
 
void reSortTileLists(const bool doGBIS, cudaStream_t stream)