12 #define MIN_DEBUG_LEVEL 3 17 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 19 #define __thread __declspec(thread) 22 #endif // defined(NAMD_CUDA) || defined(NAMD_HIP) 25 #ifdef NODEGROUP_FORCE_REGISTER 27 ComputeSMDCUDA::ComputeSMDCUDA(
28 std::vector<HomePatch*> &patchList,
29 double springConstant,
30 double transverseSpringConstant,
43 this->patchList = &patchList;
44 this->springConstant = springConstant;
45 this->transverseSpringConstant = transverseSpringConstant;
46 this->velocity = velocity;
47 this->direction = direction;
48 this->outputFrequency = outputFrequency;
49 this->firstTimeStep = firstTimeStep;
50 this->filename = filename;
51 this->isMasterDevice = isMasterDevice;
52 this->numAtoms = numAtoms;
53 this->mGpuOn =_mGpuOn;
54 this->numDevices = numDevices;
55 this->deviceIndex = deviceIndex;
58 smdAtomsGlobalIndex.clear();
60 allocate_host<double3>(&curCOM, 1);
63 smdAtomsSOAIndex.resize(this->numSMDAtoms);
64 allocate_device<unsigned int>(&d_tbcatomic, 1);
65 allocate_device<double3>(&d_curCOM, 1);
66 allocate_device<int>(&d_smdAtomsSOAIndex, this->numSMDAtoms);
70 allocate_device<double3>(&d_peerCOM,
sizeof(double3));
72 allocate_host<double3>(&h_peerCOM,
sizeof(double3));
81 copy_HtoD<double3>(curCOM, d_curCOM, 1);
82 cudaCheck(cudaMemset(d_tbcatomic, 0,
sizeof(
unsigned int)));
86 ComputeSMDCUDA::~ComputeSMDCUDA(){
88 deallocate_host<double3>(&curCOM);
89 deallocate_device<unsigned int>(&d_tbcatomic);
90 deallocate_device<int>(&d_smdAtomsSOAIndex);
94 void ComputeSMDCUDA::parseAtoms(){
97 origCOM.x = origCOM.y = origCOM.z = 0;
99 int numPDBAtoms = smdpdb.num_atoms();
100 if(numPDBAtoms < 1 )
NAMD_die(
"No Atoms found in SMDFile\n");
104 if (numPDBAtoms != this->numAtoms){
105 fprintf(stderr,
"Error, wrong numPDB (%d vs %d)\n",numPDBAtoms, this->numAtoms);
106 NAMD_die(
"The number of atoms in SMDFile must be equal to the total number of atoms in the structure!\n");
110 for(
int i = 0; i < numPDBAtoms; i++){
112 PDBAtom *atom = smdpdb.atom(i);
114 smdAtomsGlobalIndex.push_back(i);
118 origCOM.x += atom->
xcoor()*mass;
119 origCOM.y += atom->
ycoor()*mass;
120 origCOM.z += atom->
zcoor()*mass;
125 inv_group_mass = 1.0 / imass;
126 origCOM.x *= inv_group_mass;
127 origCOM.y *= inv_group_mass;
128 origCOM.z *= inv_group_mass;
131 NAMD_die(
"SMDFile contained no SMD atoms (atoms w/ nonzero occupancy)\n");
133 this->numSMDAtoms = smdAtomsGlobalIndex.size();
136 void ComputeSMDCUDA::updateAtoms(
137 std::vector<AtomMap*> &atomMapsList,
138 std::vector<CudaLocalRecord> &localRecords,
139 const int* h_globalToLocalID) {
140 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" updateAtoms full "<< smdAtomsGlobalIndex.size()<<
" SOA "<< smdAtomsSOAIndex.size()<<
"\n" <<
endi);
141 smdAtomsSOAIndex.clear();
143 smdAtomsSOAtoGlobalLocalMap.clear();
145 for(
int i = 0 ; i < this->numSMDAtoms; i++){
146 int gid = smdAtomsGlobalIndex[i];
149 for(
int j = 0 ; j < atomMapsList.size(); j++){
150 lid = atomMapsList[j]->localID(gid);
151 if( lid.
pid != -1)
break;
158 NAMD_bug(
" LocalAtomID not found in patchMap");
163 int soaPid = h_globalToLocalID[lid.
pid];
164 int soaIndex = localRecords[soaPid].bufferOffset + lid.
index;
165 smdAtomsSOAIndex.push_back(soaIndex);
168 smdAtomsSOAtoGlobalLocalMap[soaIndex]=gid;
172 int numLocalSMDAtoms=smdAtomsSOAIndex.size();
173 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" updateAtoms " << numLocalSMDAtoms <<
"\n" <<
endi);
174 if(numLocalSMDAtoms>0)
180 std::sort(smdAtomsSOAIndex.begin(), smdAtomsSOAIndex.end());
181 copy_HtoD<int>(smdAtomsSOAIndex.data(), d_smdAtomsSOAIndex, numLocalSMDAtoms);
184 void ComputeSMDCUDA::computeCOMMGpu(
186 const float * d_mass,
187 const double* d_pos_x,
188 const double* d_pos_y,
189 const double* d_pos_z,
190 const char3* d_transform,
193 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" computeCOMMGpu " << this->smdAtomsSOAIndex.size() <<
" 1st "<< this->smdAtomsSOAIndex[0]<<
"\n" <<
endi);
195 computeCOMSMDMgpu(this->smdAtomsSOAIndex.size(),lat,
196 d_mass, d_pos_x, d_pos_y, d_pos_z,
197 d_transform, this->d_smdAtomsSOAIndex,
198 d_peerCOM, cudaMgr->curSMDCOM,
199 this->d_tbcatomic, numDevices, deviceIndex, stream);
201 cudaCheck(cudaStreamSynchronize(stream));
202 copy_DtoH_sync<double3>(d_peerCOM, h_peerCOM, 1);
203 DebugM(3,
"deviceIndex "<< deviceIndex <<
" COM " <<h_peerCOM[0].x*inv_group_mass<<
", " 204 <<h_peerCOM[0].y*inv_group_mass<<
", " 205 <<h_peerCOM[0].z*inv_group_mass<<
"\n"<<
endi);
210 void ComputeSMDCUDA::doForce(
215 const double* d_pos_x,
216 const double* d_pos_y,
217 const double* d_pos_z,
218 const char3* d_transform,
219 double* d_f_normal_x,
220 double* d_f_normal_y,
221 double* d_f_normal_z,
229 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" doForce " << this->smdAtomsSOAIndex.size() <<
"\n" <<
endi);
232 bool doOutput = ((timeStep % this->outputFrequency) == 0);
235 doOutput&=cudaMgr->reducerSMDDevice == deviceIndex;
239 cudaCheck(cudaStreamSynchronize(stream));
240 copy_DtoH_sync<double3>(d_peerCOM, h_peerCOM, 1);
241 DebugM(3,
" deviceIndex "<< deviceIndex <<
" com " <<h_peerCOM[0].x*inv_group_mass<<
", " 242 <<h_peerCOM[0].y*inv_group_mass<<
", " 243 <<h_peerCOM[0].z*inv_group_mass<<
"\n"<<
endi);
252 this->inv_group_mass,
253 this->springConstant,
254 this->transverseSpringConstant,
257 doEnergy || doOutput,
269 this->smdAtomsSOAIndex.size(),
270 this->d_smdAtomsSOAIndex,
284 cudaCheck(cudaStreamSynchronize(stream));
286 Vector f(h_extForce->x, h_extForce->y, h_extForce->z);
289 if(cudaMgr->reducerSMDDevice == deviceIndex)
292 outputStep(timeStep, curCOM, h_extForce);
297 void ComputeSMDCUDA::outputStep(
const int timeStep, double3* curCOM, double3* extForce)
299 Vector p(curCOM->x, curCOM->y, curCOM->z);
300 Vector f(extForce->x, extForce->y, extForce->z);
301 if(timeStep % (100*this->outputFrequency) == 0) {
302 iout <<
"SMDTITLE: TS CURRENT_POSITION FORCE\n" <<
endi;
307 void ComputeSMDCUDA::outputCOM(std::string comment)
311 Vector p(curCOM->x, curCOM->y, curCOM->z);
318 void ComputeSMDCUDA::initPeerCOM(double3** d_peerPoolCOM, cudaStream_t stream){
319 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" initPeerCOM\n" <<
endi);
320 initPeerCOMmgpu(numDevices, deviceIndex, d_peerPoolCOM, d_peerCOM, stream);
324 void ComputeSMDCUDA::dump(std::string comment,
327 const double* d_pos_x,
328 const double* d_pos_y,
329 const double* d_pos_z,
335 DebugM(3,
"[" << CkMyPe() <<
"]" <<
" dump " << comment<<
" "<< smdAtomsSOAIndex.size() <<
"\n" <<
endi);
345 allocate_host<double>(&pos_x, numAtoms);
346 allocate_host<double>(&pos_y, numAtoms);
347 allocate_host<double>(&pos_z, numAtoms);
348 allocate_host<float>(&mass, numAtoms);
349 copy_DtoH_sync<double>(d_pos_x, pos_x, numAtoms);
350 copy_DtoH_sync<double>(d_pos_y, pos_y, numAtoms);
351 copy_DtoH_sync<double>(d_pos_z, pos_z, numAtoms);
352 copy_DtoH_sync<float>(d_mass, mass, numAtoms);
354 std::string name(
"smd-");
355 name+=std::to_string(CkMyPe());
357 ofs.open(name, std::ofstream::out | std::ofstream::app);
358 for(
int i=0; i<smdAtomsSOAIndex.size(); i++)
360 int soaid=smdAtomsSOAIndex[i];
361 ofs <<step<<
"- "<< smdAtomsSOAtoGlobalLocalMap[soaid]<<
":"<<pos_x[soaid] <<
","<<pos_y[soaid]<<
","<<pos_z[soaid]<<
","<<mass[soaid]<<
"\n";
367 #endif // NODEGROUP_FORCE_REGISTER
std::ostream & endi(std::ostream &s)
Molecule stores the structural information for the system.
std::atomic< int > reducerSMDDevice
__thread DeviceCUDA * deviceCUDA
void NAMD_bug(const char *err_msg)
static ComputeCUDAMgr * getComputeCUDAMgr()
void NAMD_die(const char *err_msg)
Real atommass(int anum) const