9 #define MIN_DEBUG_LEVEL 3    13 #ifdef NODEGROUP_FORCE_REGISTER    14 GroupRestraintsCUDA::GroupRestraintsCUDA(
const GroupRestraintParam *param, 
bool _mGpuOn, 
int _numDevices, 
int _deviceIndex, 
int grp) {
    18     numDevices = _numDevices;
    19     deviceIndex = _deviceIndex;
    21     splitOverDevicesGrp1=
false;
    22     splitOverDevicesGrp2=
false;
    25     inv_group1_mass = 0.0;
    26     inv_group2_mass = 0.0;
    29     numRestrainedGroup1 = group1Index.size();
    30     numRestrainedGroup2 = group2Index.size();
    31     totalNumRestrained = numRestrainedGroup1 + numRestrainedGroup2; 
    34     calcGroup1COM = (numRestrainedGroup1 ? true : 
false);
    37     groupAtomsSOAIndex.resize(totalNumRestrained);
    45     allocate_host<double>(&h_resEnergy, 1);
    46     allocate_host<double3>(&h_diffCOM, 1);
    47     allocate_host<double3>(&h_group1COM, 1);
    48     allocate_host<double3>(&h_group2COM, 1);
    50     allocate_host<double3>(&h_resForce, 1);
    51     allocate_device<double3>(&d_group1COM, 1);
    52     allocate_device<double3>(&d_group2COM, 1);
    53     allocate_device<int>(&d_groupAtomsSOAIndex, totalNumRestrained);
    54     allocate_device<unsigned int>(&d_tbcatomic, 1);
    56     cudaCheck(cudaMemset(d_tbcatomic, 0, 
sizeof(
unsigned int)));
    59       allocate_device<double3>(&d_peer1COM, 
sizeof(double3));
    60       allocate_device<double3>(&d_peer2COM, 
sizeof(double3));
    62       allocate_host<double3>(&h_peer1COM, 
sizeof(double3));
    63       allocate_host<double3>(&h_peer2COM, 
sizeof(double3)*numDevices);
    70     double total_mass = 0.0;
    72     for (
int i = 0; i < numRestrainedGroup2; ++i) {
    73         int index = group2Index[i]; 
    74         if (index > -1 && index < totalAtoms) {
    78             sprintf(err_msg, 
"Group restraints: Bad atom index for %s!"    79                 " Atom indices must be within [%d, %d].\n", groupName, 0, totalAtoms - 1);
    83     inv_group2_mass = 1.0 / total_mass; 
    89         for (
int i = 0; i < numRestrainedGroup1; ++i) {
    90             int index = group1Index[i]; 
    91             if (index > -1 && index < totalAtoms) {
    95                 sprintf(err_msg, 
"Group restraints: Bad atom index for %s!"    96                     " Atom indices must be within [%d, %d].\n", groupName, 0, totalAtoms - 1);
   100         inv_group1_mass = 1.0 / total_mass; 
   106         h_group1COM->
x = ref.
x;
   107         h_group1COM->y = ref.
y;
   108         h_group1COM->z = ref.
z;
   112 GroupRestraintsCUDA::~GroupRestraintsCUDA() {
   113     deallocate_host<double>(&h_resEnergy);
   114     deallocate_host<double3>(&h_resForce);
   115     deallocate_host<double3>(&h_diffCOM);
   116     deallocate_host<double3>(&h_group1COM);
   117     deallocate_host<double3>(&h_group2COM);
   118     deallocate_device<double3>(&d_group1COM); 
   119     deallocate_device<double3>(&d_group2COM); 
   120     deallocate_device<int>(&d_groupAtomsSOAIndex);
   121     deallocate_device<unsigned int>(&d_tbcatomic);
   125 void GroupRestraintsCUDA::updateAtoms(
   126       std::vector<AtomMap*> &atomMapsList,
   127       std::vector<CudaLocalRecord> &localRecords,
   128       const int *h_globalToLocalID) {
   132     numRestrainedGroup1Local = numRestrainedGroup1;
   133     numRestrainedGroup2Local = numRestrainedGroup2;
   135       groupAtomsSOAIndex.clear();
   137         const std::vector<int> &group1Index = resParam->GetGroup1AtomIndex();
   138         DebugM(3, 
"[" << CkMyPe() << 
"]" <<
" updateAtoms grp1 "<< group1Index.size() << 
"\n" << 
endi);
   139         if(numRestrainedGroup1 != group1Index.size()) {
   141             sprintf(
"Number of atoms in group 1 restraint for '%s' is changed!", groupName);
   145         for(
int i = 0 ; i < numRestrainedGroup1; ++i){
   146             int gid = group1Index[i];
   148             DebugM(2, 
"[" << CkMyPe() << 
"]" <<
" updateAtoms grp1 looking for "<<  gid << 
"\n" << 
endi);
   150             for(
int j = 0 ; j < atomMapsList.size(); ++j){
   151                 lid = atomMapsList[j]->localID(gid);
   160                 NAMD_bug(
"LocalAtomID not found in patchMap");
   162                 splitOverDevicesGrp1=
true;
   167                 int soaPid = h_globalToLocalID[lid.
pid];
   170                     int soaIndex = localRecords[soaPid].bufferOffset + lid.
index;
   171                     DebugM(2, 
"[" << CkMyPe() << 
"]" <<
" updateAtoms grp1 found gid "<< gid << 
" as soa "<<  soaIndex << 
"\n" << 
endi);
   173                       groupAtomsSOAIndex.push_back(soaIndex);
   175                       groupAtomsSOAIndex[i] = soaIndex;
   179         DebugM(3, 
"[" << CkMyPe() << 
"]" <<
" updateAtoms grp1 soa "<<  groupAtomsSOAIndex.size() << 
"\n" << 
endi);
   183             numRestrainedGroup1Local = groupAtomsSOAIndex.size();
   185         if(numRestrainedGroup1Local>0)
   186           std::sort(groupAtomsSOAIndex.begin(), groupAtomsSOAIndex.begin()+numRestrainedGroup1Local);
   191     const std::vector<int> &group2Index = resParam->GetGroup2AtomIndex();
   192     if(numRestrainedGroup2 != group2Index.size()) {
   194         sprintf(
"Number of atoms in group 2 restraint for '%s' is changed!", groupName);
   197     DebugM(4, 
"[" << CkMyPe() << 
"]" <<
" updateAtoms grp2 "<< group2Index.size() << 
"\n" << 
endi);
   199     for(
int i = 0 ; i < numRestrainedGroup2; ++i){
   200         int gid = group2Index[i];
   202         DebugM(2, 
"[" << CkMyPe() << 
"]" <<
" updateAtoms grp2 looking for "<<  gid << 
"\n" << 
endi);
   204         for(
int j = 0 ; j < atomMapsList.size(); ++j){
   205             lid = atomMapsList[j]->localID(gid);
   214             NAMD_bug(
"LocalAtomID not found in patchMap");
   216             splitOverDevicesGrp2=
true;
   222             int soaPid = h_globalToLocalID[lid.
pid];
   225                 int soaIndex = localRecords[soaPid].bufferOffset + lid.
index;
   226                 DebugM(2, 
"[" << CkMyPe() << 
"]" <<
" updateAtoms grp2 found "<<  gid << 
" as SOA "<< soaIndex << 
"\n" << 
endi);
   229                   groupAtomsSOAIndex.push_back(soaIndex);
   231                   groupAtomsSOAIndex[i + numRestrainedGroup1] = soaIndex;
   232                 DebugM(2, 
"[" << CkMyPe() << 
"]" <<
" updateAtoms grp2 SOA soaidx "<< soaIndex<< 
" size now "<< groupAtomsSOAIndex.size() <<
"\n" << 
endi);
   236     numRestrainedGroup2Local = groupAtomsSOAIndex.size() - numRestrainedGroup1Local;
   237     DebugM(3, 
"[" << CkMyPe() << 
"]" <<
" updateAtoms grp2 SOA now "<< groupAtomsSOAIndex.size() << 
" numRestrainedGroup1Local " << numRestrainedGroup1Local << 
" numRestrainedGroup2Local " << numRestrainedGroup2Local <<
"\n" << 
endi);
   239     if(groupAtomsSOAIndex.size()>0){
   241       if(numRestrainedGroup2Local>0)
   242         std::sort(groupAtomsSOAIndex.begin() + numRestrainedGroup1Local, groupAtomsSOAIndex.end());
   244       copy_HtoD<int>(groupAtomsSOAIndex.data(), d_groupAtomsSOAIndex, groupAtomsSOAIndex.size());
   247 void GroupRestraintsCUDA::initPeerCOM(double3** d_peerCOM1G,
   248                                  double3** d_peerCOM2G, cudaStream_t stream){
   249   DebugM(3, 
"[" << CkMyPe() << 
"]" << 
" initPeerCOM\n" << 
endi);
   250   initPeerCOMmgpuG(numDevices, deviceIndex, d_peerCOM1G, d_peer1COM, stream);
   251   initPeerCOMmgpuG(numDevices, deviceIndex, d_peerCOM2G, d_peer2COM, stream);
   254 void GroupRestraintsCUDA::doCOM_mgpu(
   256         const char3*  d_transform,
   258         const double* d_pos_x,
   259         const double* d_pos_y,
   260         const double* d_pos_z,
   262         cudaStream_t  stream) {
   265   DebugM(3, 
"[" << CkMyPe() << 
"]" << 
" doCOM_mgpu g1count "<< numRestrainedGroup1Local<< 
" g2count "<<numRestrainedGroup2Local  << 
endi);
   266   if(numRestrainedGroup1Local>0 && calcGroup1COM)
   269         computeCOMMgpu(numRestrainedGroup1Local,lat,
   270                     d_mass, d_pos_x, d_pos_y, d_pos_z,
   271                        d_transform, this->d_groupAtomsSOAIndex,
   275                        numDevices, deviceIndex,
   278         cudaCheck(cudaStreamSynchronize(stream));
   279         copy_DtoH_sync<double3>(d_peer1COM, h_peer1COM, 1);
   280         DebugM(3, 
"gid " << groupRestID <<
" deviceIndex "<< deviceIndex << 
" g1 COM " <<h_peer1COM->x*inv_group1_mass<<
", "   281                <<h_peer1COM->y*inv_group1_mass<<
", "   282                <<h_peer2COM->z*inv_group1_mass<<
"\n"<<
endi);
   285   if(numRestrainedGroup2Local>0)
   288         computeCOMMgpu(numRestrainedGroup2Local,lat,
   289                     d_mass, d_pos_x, d_pos_y, d_pos_z,
   291                        this->d_groupAtomsSOAIndex+numRestrainedGroup1Local,
   294                        this->d_tbcatomic,  numDevices, deviceIndex,
   297         cudaCheck(cudaStreamSynchronize(stream));
   298         copy_DtoH_sync<double3>(d_peer2COM, h_peer2COM, 1);
   299         DebugM(3, 
" gid "<< groupRestID << 
" deviceIndex "<< deviceIndex << 
" g2 COM " <<h_peer2COM[0].x*inv_group2_mass<<
", "   300                <<h_peer2COM[0].y*inv_group2_mass<<
", "   301                <<h_peer2COM[0].z*inv_group2_mass<<
"\n"<<
endi);
   307 void GroupRestraintsCUDA::doForce(
   314     const char3*  d_transform, 
   316     const double* d_pos_x,
   317     const double* d_pos_y,
   318     const double* d_pos_z,
   319     double*       d_f_normal_x,
   320     double*       d_f_normal_y,
   321     double*       d_f_normal_z,
   326     double3**      d_peer1COM,
   327     double3**      d_peer2COM,
   328     cudaStream_t  stream) {
   329   DebugM(3, 
"[" << CkMyPe() << 
"]" << 
" doForce g1 "<< numRestrainedGroup1Local <<
" g2 "<< numRestrainedGroup2Local <<
"\n" << 
endi);
   334         computeGroupRestraint_2Group(
   339         numRestrainedGroup1Local,
   340         numRestrainedGroup1Local + numRestrainedGroup2Local,
   345         this->inv_group1_mass,
   346         this->inv_group2_mass,
   347         this->d_groupAtomsSOAIndex,
   372       if(numRestrainedGroup2Local>0)
   374           computeGroupRestraint_1Group(
   379                                          numRestrainedGroup2Local,
   384                                          this->inv_group2_mass,
   385                                          this->d_groupAtomsSOAIndex,
   410         cudaCheck(cudaStreamSynchronize(stream));
   412         h_extEnergy[0] += h_resEnergy[0];
   415         if (!calcGroup1COM) {
   416             h_extForce->x += h_resForce->x;
   417             h_extForce->y += h_resForce->y;
   418             h_extForce->z += h_resForce->z;
   422         sprintf(msg,
"GRES: %9d %14s %14.4f %14.4f %14.4f %19.4f %14.4f %14.4f %14.4f\n",
   423             timeStep, groupName, h_diffCOM->x, h_diffCOM->y, h_diffCOM->z,
   424             h_resForce->x, h_resForce->y, h_resForce->z, h_resEnergy[0]);
   442 ComputeGroupRestraintsCUDA::ComputeGroupRestraintsCUDA(
const int outputFreq,
   444     gResOutputFreq = outputFreq;
   446     numDevices = _numDevices;
   447     deviceIndex = _deviceIndex;
   448     const std::map<std::string, GroupRestraintParam*> & groupMap = resList.
GetGroupResMap();
   450     for (
auto it = groupMap.begin(); it != groupMap.end(); ++it) {
   451       GroupRestraintsCUDA * gResCUDA = 
new GroupRestraintsCUDA(it->second, mGpuOn, numDevices, deviceIndex, restraintsCUDAList.size());
   452         restraintsCUDAList.push_back(gResCUDA);
   456 ComputeGroupRestraintsCUDA::~ComputeGroupRestraintsCUDA() {
   457     int numGroup = restraintsCUDAList.size(); 
   458     for (
int i = 0; i < numGroup; ++i) {
   459         delete restraintsCUDAList[i]; 
   461     restraintsCUDAList.clear(); 
   465 void ComputeGroupRestraintsCUDA::updateAtoms(
   466         std::vector<AtomMap*> &atomMapsList,
   467         std::vector<CudaLocalRecord> &localRecords,
   468         const int *h_globalToLocalID) {
   470     int numGroup = restraintsCUDAList.size();
   472     for (
int i = 0; i < numGroup; ++i) {
   473         restraintsCUDAList[i]->updateAtoms(atomMapsList, localRecords, h_globalToLocalID);
   474         if(mGpuOn && restraintsCUDAList[i]->numRestrainedGroup1Local + restraintsCUDAList[i]->numRestrainedGroup2Local >0)
   482 void ComputeGroupRestraintsCUDA::initPeerCOM(cudaStream_t stream){
   483   DebugM(3, 
"[" << CkMyPe() << 
"]" << 
" initPeerCOM\n" << 
endi);
   484     int numGroup = restraintsCUDAList.size();
   486     for (
int i = 0; i < numGroup; ++i) {
   494 void ComputeGroupRestraintsCUDA::doCOM_mgpu(
   496         const char3*  d_transform,
   498         const double* d_pos_x,
   499         const double* d_pos_y,
   500         const double* d_pos_z,
   501         cudaStream_t  stream) {
   502   int numGroup = restraintsCUDAList.size();
   503   DebugM(3, 
"[" << CkMyPe() << 
"]" << 
" doCOM_mgpu "<< numGroup <<
"\n" << 
endi);
   504   for (
int gIdx = 0; gIdx < numGroup; ++gIdx) {
   506     restraintsCUDAList[gIdx]->doCOM_mgpu(
   508                                          d_mass, d_pos_x, d_pos_y, d_pos_z, gIdx,
   514 void ComputeGroupRestraintsCUDA::doForce(
   519         const char3*  d_transform, 
   521         const double* d_pos_x,
   522         const double* d_pos_y,
   523         const double* d_pos_z,
   524         double*       d_f_normal_x,
   525         double*       d_f_normal_y,
   526         double*       d_f_normal_z,
   531         cudaStream_t  stream) {
   533     const int doOutput = (timeStep % gResOutputFreq) == 0;
   536     int doVirCalc = (doOutput ? 1 : doVirial);
   537     int numGroup = restraintsCUDAList.size();
   541     h_extEnergy[0]  = 0.0;
   545     h_extVirial->
xx = 0.0;
   546     h_extVirial->
xy = 0.0;
   547     h_extVirial->
xz = 0.0;
   548     h_extVirial->
yx = 0.0;
   549     h_extVirial->
yy = 0.0;
   550     h_extVirial->
yz = 0.0;
   551     h_extVirial->
zx = 0.0;
   552     h_extVirial->
zy = 0.0;
   553     h_extVirial->
zz = 0.0;
   556       if(timeStep % (100 * gResOutputFreq) == 0) {
   558         sprintf(msg,
"\nGRES_TITLE: %3s %14s %14s %14s %14s %19s %14s %14s %14s\n",
   559             "TS", 
"GROUP_NAME", 
"DISTANCE.X", 
"DISTANCE.Y", 
"DISTANCE.Z",
   560             "FORCE.X", 
"FORCE.Y", 
"FORCE.Z", 
"ENERGY");
   565     for (
int gIdx = 0; gIdx < numGroup; ++gIdx) { 
   566         restraintsCUDAList[gIdx]->doForce( 
   567             timeStep, doEnergy, doVirCalc, doOutput, gIdx, lat, d_transform,
   568             d_mass, d_pos_x, d_pos_y, d_pos_z,
   569             d_f_normal_x, d_f_normal_y, d_f_normal_z, d_virial,
   570             h_extEnergy, h_extForce, h_extVirial, cudaMgr->
curGrp1COM[gIdx],  cudaMgr->
curGrp2COM[gIdx], stream);
 
std::ostream & endi(std::ostream &s)
 
Molecule stores the structural information for the system. 
 
const std::map< std::string, GroupRestraintParam * > & GetGroupResMap() const
 
Vector GetGroupRes1Position() const
 
std::atomic< int > reducerGroupRestraintDevice
 
Vector GetResDirection() const
 
NAMD_HOST_DEVICE double3 make_double3(float3 a)
 
void NAMD_bug(const char *err_msg)
 
const char * GetGroupName() const
 
static ComputeCUDAMgr * getComputeCUDAMgr()
 
void NAMD_die(const char *err_msg)
 
Real atommass(int anum) const
 
Vector GetResCenter() const
 
const std::vector< int > & GetGroup2AtomIndex() const
 
bool GetUseDistMagnitude() const
 
const std::vector< int > & GetGroup1AtomIndex() const