30 #define PRINT_FORCES 0 62 #define MIN_DEBUG_LEVEL 3 72 #define START_HPM_STEP 1000 73 #define STOP_HPM_STEP 1500 77 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 79 #define __thread __declspec(thread) 88 #define SPECIAL_PATCH_ID 91 97 int ilo=0,
int ihip1=1
99 printf(
"AOS Velocities:\n");
100 for (
int i=ilo; i < ihip1; i++) {
101 printf(
"%d %g %g %g\n", i,
111 int ilo=0,
int ihip1=1
113 printf(
"SOA Velocities:\n");
114 for (
int i=ilo; i < ihip1; i++) {
115 printf(
"%d %g %g %g\n", i, vel_x[i], vel_y[i], vel_z[i]);
121 printf(
"%g %g %g %g %g %g %g %g %g\n",
159 inline int init(
int initstep,
int initperiod,
int delta=0) {
176 pairlistsAreValid(0),
183 #
if defined(NAMD_CUDA) || defined(NAMD_HIP)
185 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
186 PatchData* patchData = cpdata.ckLocalBranch();
189 #endif // defined(NAMD_CUDA) || defined(NAMD_HIP) 228 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER) 253 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) && defined(SEQUENCER_SOA) && defined(NODEGROUP_FORCE_REGISTER) 255 delete CUDASequencer;
262 void Sequencer::threadRun(
Sequencer* arg)
272 DebugM(4,
"::run() - this = " <<
this <<
"\n" );
273 thread = CthCreate((CthVoidFn)&(threadRun),(
void*)(
this),
SEQ_STK_SZ);
274 CthSetStrategyDefault(thread);
295 switch ( scriptTask ) {
340 NAMD_die(
"Minimization is currently not supported on the GPU integrator\n");
355 #ifdef NODEGROUP_FORCE_REGISTER 361 #ifdef NODEGROUP_FORCE_REGISTER 370 NAMD_bug(
"Unknown task in Sequencer::algorithm");
385 #if defined(NODEGROUP_FORCE_REGISTER) 389 CUDASequencer->numPatchesCheckedIn += 1;
390 if (CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe())) {
392 CUDASequencer->waitingThreads.push_back(CthSelf());
399 int lastStep = CUDASequencer->patchData->flags.step;
401 if (CUDASequencer->breakSuspends)
break;
407 CUDASequencer->numPatchesCheckedIn += 1;
408 CUDASequencer->waitingThreads.push_back(CthSelf());
409 if(CUDASequencer->numPatchesCheckedIn == patchMap->
numPatchesOnNode(CkMyPe()) - 1 &&
410 CUDASequencer->masterThreadSleeping){
411 CUDASequencer->masterThreadSleeping =
false;
412 CthAwaken(CUDASequencer->masterThread);
419 CUDASequencer->numPatchesCheckedIn = 0;
420 for (CthThread t : CUDASequencer->waitingThreads) {
423 CUDASequencer->waitingThreads.clear();
439 CUDASequencer->sync();
452 ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
460 if (isMaster && cudaGlobal && doMigration) cudaGlobal->setStep(static_cast<int64_t>(
patch->
flags.
step));
480 cudaBond->atomUpdate();
487 cudaGlobal->updateAtomMaps();
488 cudaGlobal->communicateToClients(&(this->
patch->
lattice));
496 CUDASequencer->launch_set_compute_positions();
497 CUDASequencer->sync();
505 for(
int i = 0 ; i < CkNumPes(); i++){
508 ComputeBondedCUDA* b = CUDASequencer->patchData->cudaBondedList[i];
510 if (b == NULL)
continue;
511 auto list = std::find(std::begin(b->getBondedPes()), std::end(b->getBondedPes()), CkMyPe());
512 if( list != std::end(b->getBondedPes()) ){
513 b->openBoxesOnPe(startup);
528 cudaBond->loadTuplesOnPe(startup);
534 cudaBond->copyTupleDataGPU(startup);
536 cudaBond->copyTupleDataSN();
546 cudaBond->launchWork();
548 if (cudaPme && computePme) {
549 cudaPme->
compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
554 cudaGlobal->calculate();
570 for(
int i = 0; i < numhp; ++i) {
572 hp->zero_global_forces_SOA();
587 CUDASequencer->copyGlobalForcesToDevice();
594 cudaBond->finishPatches();
602 cudaBond->finishReductions();
603 if (cudaGlobal) cudaGlobal->finishReductions();
612 if (cudaPme && computePme) {
613 cudaPme->
compute(*(CUDASequencer->patchData->lat), reducePme, this->patch->flags.step);
619 cudaGlobal->calculate();
631 for(
int i = 0; i < numhp; ++i) {
633 hp->zero_global_forces_SOA();
648 CUDASequencer->copyGlobalForcesToDevice();
654 cudaBond->finishPatches();
659 if (cudaGlobal) cudaGlobal->finishReductions();
677 const int doMigration,
680 const int maxForceNumber,
685 Controller *c_out = CUDASequencer->patchData->c_out;
686 bool mGpuOn = CUDASequencer->mGpuOn;
692 CUDASequencer->submitReductionValues();
695 CUDASequencer->patchData->reductionBackendSave->setVal(
reduction);
699 c_out->mcPressure_prepare(step);
706 CUDASequencer->monteCarloPressure_part1(factor, origin, oldLattice);
711 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
712 CUDASequencer->patchData->factor = &(factor);
714 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
725 CUDASequencer->update_patch_flags();
736 CUDASequencer->monteCarloPressure_part2(step, maxForceNumber,
737 doEnergy, doGlobal, doVirial);
738 CUDASequencer->submitReductionValues();
742 c_out->mcPressure_accept(step);
748 CUDASequencer->monteCarloPressure_accept(doMigration);
753 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
755 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
759 CUDASequencer->monteCarloPressure_reject(this->
patch->
lattice);
762 reduction->
setVal(CUDASequencer->patchData->reductionBackendSave);
768 if(isMasterPe && !accepted){
770 CUDASequencer->update_patch_flags();
775 const int updatePatchMap) {
778 const bool updatePatchData = startup || doGlobal || updatePatchMap;
781 bool realloc =
false;
790 if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
797 CUDASequencer->reallocateMigrationDestination();
798 CUDASequencer->registerSOAPointersToHost();
802 CUDASequencer->copySOAHostRegisterToDevice();
811 CUDASequencer->migrationLocalInit();
817 CUDASequencer->migrationPerform();
823 CUDASequencer->migrationUpdateAtomCounts();
829 CUDASequencer->migrationUpdateAtomOffsets();
835 CUDASequencer->copyPatchDataToHost();
843 realloc = CUDASequencer->copyPatchData(
true,
false);
850 if(CUDASequencer->patchData->atomReallocationFlagPerDevice[i] != 0) {
857 CUDASequencer->registerSOAPointersToHost();
861 CUDASequencer->copySOAHostRegisterToDevice();
867 CUDASequencer->migrationLocalPost(0);
868 CUDASequencer->migrationSortAtomsNonbonded();
873 if (!updatePatchData) {
876 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
877 CUDASequencer->masterThreadSleeping =
true;
878 CUDASequencer->masterThread = CthSelf();
884 CUDASequencer->sync();
889 CUDASequencer->migrationUpdateDestination();
894 CUDASequencer->migrationUpdateProxyDestination();
899 CUDASequencer->migrationUpdateRemoteOffsets();
904 CUDASequencer->copyDataToPeers(
true);
908 if (updatePatchData) {
912 for(
int i = 0; i < numhp; ++i) {
921 CUDASequencer->copyAoSDataToHost();
926 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
927 CUDASequencer->masterThreadSleeping =
true;
928 CUDASequencer->masterThread = CthSelf();
935 CUDASequencer->updateHostPatchDataSOA();
943 CUDASequencer->migrationUpdateAdvancedFeatures(
false);
951 #ifdef TIMER_COLLECTION 952 TimerSet& t =
patch->timerSet;
969 Controller *c_out = CUDASequencer->patchData->c_out;
994 doNonbonded = (step >= numberOfSteps) ||
1011 doFullElectrostatics = (dofull &&
1012 ((step >= numberOfSteps) ||
1024 doEnergy = energyFrequency.
init(step, newComputeEnergies);
1037 int doGlobal = (doTcl || doColvars || doIMD);
1040 bool globalMasterStep=
false;
1041 int doGlobalObjects=0;
1042 int doGlobalStaleForces = 0;
1047 globalMasterStep = globalMasterFrequency.
check(step);
1048 doGlobalObjects = globalMasterStep? 1:0;
1052 doGlobalStaleForces=1;
1060 doGlobalStaleForces=doGlobalObjects;
1064 doGlobalStaleForces=doGlobalObjects;
1069 doGlobalStaleForces = 0;
1070 doGlobalObjects = 0;
1086 langevinPistonFrequency.
init(step,
1118 patch->copy_atoms_to_SOA();
1134 CUDASequencer->breakSuspends =
false;
1139 if (CUDASequencer->patchData->ptrCollectionMaster == NULL) {
1142 CUDASequencer->patchData->ptrCollectionMaster = pcm;
1145 if (CUDASequencer->patchData->ptrOutput == NULL) {
1148 CUDASequencer->patchData->ptrOutput = pout;
1151 if (CUDASequencer->patchData->pdb == NULL) {
1154 CUDASequencer->patchData->pdb = pdb;
1157 if (CUDASequencer->patchData->imd == NULL) {
1160 CUDASequencer->patchData->imd = imd;
1170 CUDASequencer->patchData->cudaBondedList[CkMyPe()] = NULL;
1171 CUDASequencer->patchData->cudaNonbondedList[CkMyPe()] = NULL;
1201 if(patchData->updateCounter.load()>0)
1203 CUDASequencer->updateDeviceKernels();
1207 CUDASequencer->startRun1(maxForceUsed, this->
patch->
lattice);
1210 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
1211 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
1215 const bool addCudaGlobalForces =
1216 (cudaGlobalMasterObject !=
nullptr) ?
1217 cudaGlobalMasterObject->willAddGlobalForces() :
1219 if (addCudaGlobalForces) {
1220 CUDASequencer->allocateGPUSavedForces();
1228 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
1229 CUDASequencer->masterThreadSleeping =
true;
1230 CUDASequencer->masterThread = CthSelf();
1252 CUDASequencer->setRescalePairlistTolerance(step < numberOfSteps);
1263 CUDASequencer->finish_patch_flags(
true);
1266 const bool addCudaGlobalForces =
1267 (cudaGlobalMasterObject !=
nullptr) ?
1268 cudaGlobalMasterObject->willAddGlobalForces() :
1270 CUDASequencer->startRun2(timestep,
1272 doGlobal || addCudaGlobalForces, maxForceUsed);
1276 const bool requestTotalForces = computeGlobal ? computeGlobal->
getForceSendActive() :
false;
1279 const bool requestGPUTotalForces =
1280 (cudaGlobalMasterObject !=
nullptr) ?
1281 cudaGlobalMasterObject->requestedTotalForces() :
1283 CUDASequencer->startRun3(timestep,
1285 requestTotalForces, doGlobalStaleForces,
1287 requestGPUTotalForces,
1298 for(
int i = 0; i < numhp; ++i) {
1304 CUDASequencer->submitReductionValues();
1314 CUDASequencer->patchData->flags.copyIntFlags(this->
patch->
flags);
1316 c_out->printStep(step);
1322 double velrescaling = 1;
1324 for( ++step; step <= numberOfSteps; ++step ){
1325 const int imdStep = imdFrequency.
check(step);
1326 const int isForcesOutputStep = forceDcdFrequency.
check(step) + (doIMD ? imdStep : 0);
1327 int dcdSelectionChecks=0;
1329 for(
int dcdindex=0; dcdindex<16;++dcdindex)
1332 if(dcdSelectionFrequency && step % dcdSelectionFrequency==0)
1333 dcdSelectionChecks++;
1335 const int isCollection = restartFrequency.
check(step) +
1336 dcdFrequency.
check(step) + velDcdFrequency.
check(step) +
1337 imdStep + dcdSelectionChecks;
1338 int isMigration =
false;
1339 const int doVelocityRescale = stochRescaleFrequency.
check(step);
1340 const int doMCPressure = monteCarloPressureFrequency.
check(step);
1342 doEnergy = energyFrequency.
check(step) || doVelocityRescale || doMCPressure;
1343 int langevinPistonStep = langevinPistonFrequency.
check(step);
1345 int reassignVelocityStep = reassignVelocityFrequency.
check(step);
1348 int berendsenPressureStep = 0;
1353 berendsenPressureStep = 1;
1356 if(patchData->updateCounter.load()>0)
1358 CUDASequencer->updateDeviceKernels();
1363 globalMasterStep = globalMasterFrequency.
check(step);
1364 doGlobalObjects = globalMasterStep? 1:0;
1368 doGlobalStaleForces=1;
1376 doGlobalStaleForces=doGlobalObjects;
1380 doGlobalStaleForces=doGlobalObjects;
1385 doGlobalStaleForces = 0;
1386 doGlobalObjects = 0;
1387 globalMasterStep =
false;
1391 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 1395 int beginStep =
simParams->beginEventStep;
1397 bool controlProfiling = eon && epid;
1398 if (controlProfiling && step == beginStep) {
1401 if (controlProfiling && step == endStep) {
1410 c_out->piston1(step);
1414 c_out->berendsenPressureController(step);
1419 if (isMasterPe)
cudaCheck(cudaDeviceSynchronize());
1424 if (langevinPistonStep || berendsenPressureStep) {
1429 CUDASequencer->patchData->lat = &(this->
patch->
lattice);
1430 CUDASequencer->patchData->factor = &(factor);
1436 int previousMaxForceUsed;
1439 previousMaxForceUsed = maxForceUsed;
1443 doNonbonded = nonbondedFrequency.
check(step);
1445 doFullElectrostatics = (dofull && fullElectFrequency.
check(step));
1461 CUDASequencer->launch_part1(
1463 timestep, nbondstep, slowstep, velrescaling, maxvel2,
1464 *(CUDASequencer->patchData->factor),
1467 (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1468 reassignVelocityStep,
1470 berendsenPressureStep,
1471 previousMaxForceUsed,
1473 this->patch->flags.savePairlists,
1474 this->patch->flags.usePairlists,
1479 if (reassignVelocityStep)
1489 CUDASequencer->launch_part11(
1490 timestep, nbondstep, slowstep, velrescaling, maxvel2,
1491 *(CUDASequencer->patchData->factor),
1494 (langevinPistonStep || berendsenPressureStep) ? *(CUDASequencer->patchData->lat) : this->patch->lattice,
1496 previousMaxForceUsed,
1498 this->patch->flags.savePairlists,
1499 this->patch->flags.usePairlists,
1510 if(CUDASequencer->patchData->migrationFlagPerDevice[i] != 0) {
1519 CUDASequencer->launch_set_compute_positions();
1527 CUDASequencer->updatePairlistFlags(isMigration);
1529 CUDASequencer->copyPositionsAndVelocitiesToHost(isMigration, doGlobalObjects);
1538 cudaGlobal->communicateToClients(&(this->
patch->
lattice));
1549 if(CUDASequencer->numPatchesCheckedIn < patchMap->
numPatchesOnNode(CkMyPe()) -1 ) {
1550 CUDASequencer->masterThreadSleeping =
true;
1551 CUDASequencer->masterThread = CthSelf();
1568 CUDASequencer->finish_patch_flags(isMigration);
1577 const bool addCudaGlobalForces =
1578 (cudaGlobalMasterObject !=
nullptr) ?
1579 cudaGlobalMasterObject->willAddGlobalForces() :
1581 CUDASequencer->launch_part2(doMCPressure,
1582 timestep, nbondstep, slowstep,
1589 doGlobalStaleForces || addCudaGlobalForces,
1601 const bool requestTotalForces = (computeGlobal ? computeGlobal->
getForceSendActive() :
false) && doGlobalObjects;
1606 const bool requestGPUTotalForces =
1607 (CudaGlobalMasterObject !=
nullptr) ?
1608 CudaGlobalMasterObject->requestedTotalForces() :
1610 CUDASequencer->launch_part3(doMCPressure,
1611 timestep, nbondstep, slowstep,
1616 doGlobalStaleForces,
1617 requestGPUTotalForces,
1621 isForcesOutputStep);
1628 if (requestTotalForces) {
1633 for(
int i = 0; i < numhp; ++i) {
1639 CUDASequencer->submitReductionValues();
1649 c_out->printStep(step);
1653 if (doVelocityRescale) {
1660 if (doVelocityRescale) {
1674 CUDASequencer->copyAoSDataToHost();
1681 for (
auto i= hplist->
begin(); i != hplist->
end(); i++) {
1697 CUDASequencer->copyAoSDataToHost();
1698 CUDASequencer->updateHostPatchDataSOA();
1699 CUDASequencer->saveForceCUDASOA_direct(
false,
true, maxForceUsed);
1705 for (
auto i= hplist->
begin(); i != hplist->
end(); i++) {
1707 hp->sort_solvent_atoms();
1708 hp->copy_atoms_to_SOA();
1709 hp->copy_forces_to_AOS();
1713 CUDASequencer->updateHostPatchDataSOA();
1714 CUDASequencer->saveForceCUDASOA_direct(
false,
true, maxForceUsed);
1716 if(isMasterPe) CUDASequencer->copyPositionsAndVelocitiesToHost(
true,doGlobal);
1719 for (
auto i= hplist->
begin(); i != hplist->
end(); i++) {
1721 hp->copy_updates_to_AOS();
1722 hp->copy_forces_to_AOS();
1728 CUDASequencer->breakSuspends =
true;
1742 CUDASequencer->copyPatchData(
true, startup);
1744 CUDASequencer->reallocateMigrationDestination();
1745 CUDASequencer->copyAtomDataToDeviceAoS();
1747 CUDASequencer->copyAtomDataToDevice(startup, maxForceUsed);
1749 CUDASequencer->migrationLocalPost(startup);
1750 CUDASequencer->migrationUpdateAdvancedFeatures(startup);
1752 CUDASequencer->registerSOAPointersToHost();
1756 CUDASequencer->copySOAHostRegisterToDevice();
1762 CUDASequencer->updateHostPatchDataSOA();
1776 ComputeBondedCUDA* cudaBond = cudaMgr->getComputeBondedCUDA();
1779 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1780 patchData = cpdata.ckLocalBranch();
1785 if (patchData->devicePatchMapFlag[CkMyPe()])
return;
1786 patchData->devicePatchMapFlag[CkMyPe()] = 1;
1797 std::vector<NBPatchRecord>& nonbondedPatches = cudaNbond->
getPatches();
1798 std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1799 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1805 homePatches.begin(),
1813 for (
int i = 0; i < nonbondedPatches.size(); i++) {
1815 record.
patchID = nonbondedPatches[i].patchID;
1818 const int targetPatchID = record.
patchID;
1819 auto result = std::find_if(
1820 homePatches.begin(),
1823 return (p->getPatchID() == targetPatchID);
1826 record.
isProxy = (result == homePatches.end());
1827 localPatches.push_back(record);
1834 localPatches.begin(),
1837 return (a.
isProxy < b.isProxy);
1842 cudaBond->updatePatchOrder(localPatches);
1844 patchData->devData[deviceIndex].numPatchesHome = homePatches.size();
1845 patchData->devData[deviceIndex].numPatchesHomeAndProxy = localPatches.size();
1857 std::vector<CudaPeerRecord>& myPeerPatches = patchData->devData[deviceIndex].h_peerPatches;
1858 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1860 for (
int i = 0; i < localPatches.size(); i++) {
1861 std::vector<CudaPeerRecord> tempPeers;
1862 const int targetPatchID = localPatches[i].patchID;
1863 const int targetIsProxy = localPatches[i].isProxy;
1866 if (devIdx == deviceIndex)
continue;
1867 std::vector<CudaLocalRecord>& peerPatches = patchData->devData[devIdx].h_localPatches;
1871 for (
int j = 0; j < patchData->devData[devIdx].numPatchesHomeAndProxy; j++) {
1873 if (peer.
patchID == targetPatchID && peer.
isProxy != targetIsProxy) {
1877 tempPeers.push_back(peerRecord);
1885 localPatches[i].numPeerRecord = tempPeers.size();
1886 if (!tempPeers.empty()) {
1887 localPatches[i].peerRecordStartIndex = myPeerPatches.size();
1888 myPeerPatches.insert(myPeerPatches.end(), tempPeers.begin(), tempPeers.end());
1896 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1897 patchData = cpdata.ckLocalBranch();
1903 const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1904 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1907 CkPrintf(
"PE: %d\n", CkMyPe());
1909 CkPrintf(
"[%d] Home patches %d Local patches %d\n", CkMyPe(), numPatchesHome, localPatches.size());
1911 CkPrintf(
"Home Patches: ");
1912 for (
int i = 0; i < numPatchesHome; i++) {
1913 CkPrintf(
"%d ", localPatches[i].patchID);
1917 CkPrintf(
"Proxy Patches: ");
1918 for (
int i = numPatchesHome; i < localPatches.size(); i++) {
1919 CkPrintf(
"%d ", localPatches[i].patchID);
1929 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1930 patchData = cpdata.ckLocalBranch();
1935 if (!patchData->devicePatchMapFlag[CkMyPe()])
return;
1936 patchData->devicePatchMapFlag[CkMyPe()] = 0;
1943 std::vector<HomePatch*>& homePatches = patchData->devData[deviceIndex].patches;
1944 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1945 std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
1947 homePatches.clear();
1948 localPatches.clear();
1949 peerPatches.clear();
1962 CProxy_PatchData cpdata(CkpvAccess(BOCclass_group).patchData);
1963 patchData = cpdata.ckLocalBranch();
1969 const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
1970 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
1975 int max_atom_count = 0;
1976 int total_atom_count = 0;
1979 for (
int i = 0; i < numPatchesHome; i++) {
1984 if (
patch != NULL)
break;
1986 if (
patch == NULL)
NAMD_die(
"Sequencer: Failed to find patch in updateDevicePatchMap");
1991 if (localPatches[i].numAtoms > max_atom_count) max_atom_count = localPatches[i].numAtoms;
1992 total_atom_count += localPatches[i].numAtoms;
2000 const int numPatchesHome = patchData->devData[deviceIndex].numPatchesHome;
2001 const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2002 std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
2003 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2005 for (
int i = numPatchesHome; i < numPatchesHomeAndProxy; i++) {
2006 const int index = localPatches[i].peerRecordStartIndex;
2007 const int devIdx = peerPatches[index].deviceIndex;
2008 const int peerIdx = peerPatches[index].patchIndex;
2009 const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
2011 localPatches[i].numAtoms = peer.
numAtoms;
2020 const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2021 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2023 int runningOffset = 0;
2024 int runningOffsetNBPad = 0;
2026 for (
int i = 0; i < numPatchesHomeAndProxy; i++) {
2027 localPatches[i].bufferOffset = runningOffset;
2028 localPatches[i].bufferOffsetNBPad = runningOffsetNBPad;
2029 runningOffset += localPatches[i].numAtoms;
2030 runningOffsetNBPad += localPatches[i].numAtomsNBPad;
2038 const int numPatchesHomeAndProxy = patchData->devData[deviceIndex].numPatchesHomeAndProxy;
2039 std::vector<CudaLocalRecord>& localPatches = patchData->devData[deviceIndex].h_localPatches;
2040 std::vector<CudaPeerRecord>& peerPatches = patchData->devData[deviceIndex].h_peerPatches;
2043 for (
int i = 0; i < peerPatches.size(); i++) {
2044 const int devIdx = peerPatches[i].deviceIndex;
2045 const int peerIdx = peerPatches[i].patchIndex;
2046 const CudaLocalRecord peer = patchData->devData[devIdx].h_localPatches[peerIdx];
2053 for (
int i = 0; i < numPatchesHomeAndProxy; i++) {
2054 const int numPeerRecord = localPatches[i].numPeerRecord;
2055 const int peerOffset = localPatches[i].peerRecordStartIndex;
2058 localPatches[i].inline_peers[j] = peerPatches[peerOffset+j];
2075 #ifdef TIMER_COLLECTION 2076 TimerSet& t =
patch->timerSet;
2115 doNonbonded = (step >= numberOfSteps) ||
2132 doFullElectrostatics = (dofull &&
2133 ((step >= numberOfSteps) ||
2145 doEnergy = energyFrequency.
init(step, newComputeEnergies);
2150 int doGlobal = doTcl || doColvars;
2168 langevinPistonFrequency.
init(step,
2335 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 2339 int beginStep =
simParams->beginEventStep;
2344 for ( ++step; step <= numberOfSteps; ++step ) {
2345 int dcdSelectionChecks=0;
2347 for(
int dcdindex=0; dcdindex<16;++dcdindex)
2350 if(dcdSelectionFrequency && step % dcdSelectionFrequency==0)
2351 dcdSelectionChecks++;
2353 const int isCollection = restartFrequency.
check(step) +
2354 dcdFrequency.
check(step) + velDcdFrequency.
check(step) +
2355 forceDcdFrequency.
check(step) + imdFrequency.
check(step) +
2357 const int isMigration = stepsPerCycle.
check(step);
2358 doEnergy = energyFrequency.
check(step);
2359 DebugM(3,
"doGlobal now "<< doGlobal<<
"\n"<<
endi);
2361 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 2362 eon = epid && (beginStep < step && step <= endStep);
2364 if (controlProfiling && step == beginStep) {
2367 if (controlProfiling && step == endStep) {
2434 if ( langevinPistonFrequency.
check(step) ) {
2521 doNonbonded = nonbondedFrequency.
check(step);
2523 doFullElectrostatics = (dofull && fullElectFrequency.
check(step));
2679 fprintf(stderr,
"Patch %d has %d atoms\n", pid, n);
2680 fprintf(stderr,
"%3s %8s %12s %12s %12s\n",
2681 "",
"id",
"fnormal_x",
"fnbond_x",
"fslow_x");
2682 for (
int i=0; i < n; i++) {
2683 int index = p.
id[i];
2684 fprintf(stderr,
"%3d %8d %12.8f %12.8f %12.8f\n",
2691 for (
int i=0; i < n; i++) {
2702 TestArray_write<double>(
2703 "f_normal_good.bin",
"f_normal good", (
double*)f_normal, 3*n);
2704 TestArray_write<double>(
2705 "f_nbond_good.bin",
"f_nbond good", (
double*)f_nbond, 3*n);
2706 TestArray_write<double>(
2707 "f_slow_good.bin",
"f_slow good", (
double*)f_slow, 3*n);
2729 patch->copy_updates_to_AOS();
2733 printf(
"Timer collection reporting in microseconds for " 2744 const double scaling,
2749 const double * __restrict recipMass,
2750 const double * __restrict f_normal_x,
2751 const double * __restrict f_normal_y,
2752 const double * __restrict f_normal_z,
2753 const double * __restrict f_nbond_x,
2754 const double * __restrict f_nbond_y,
2755 const double * __restrict f_nbond_z,
2756 const double * __restrict f_slow_x,
2757 const double * __restrict f_slow_y,
2758 const double * __restrict f_slow_z,
2759 double * __restrict vel_x,
2760 double * __restrict vel_y,
2761 double * __restrict vel_z,
2767 NamdProfileEvent::ADD_FORCE_TO_MOMENTUM_SOA);
2769 #ifdef SOA_SIMPLIFY_PARAMS 2770 const double * __restrict recipMass =
patch->patchDataSOA.
recipMass;
2772 const double * __restrict f_normal_x =
patch->patchDataSOA.
f_normal_x;
2773 const double * __restrict f_normal_y =
patch->patchDataSOA.
f_normal_y;
2774 const double * __restrict f_normal_z =
patch->patchDataSOA.
f_normal_z;
2776 const double * __restrict f_nbond_x =
patch->patchDataSOA.
f_nbond_x;
2777 const double * __restrict f_nbond_y =
patch->patchDataSOA.
f_nbond_y;
2778 const double * __restrict f_nbond_z =
patch->patchDataSOA.
f_nbond_z;
2780 const double * __restrict f_slow_x =
patch->patchDataSOA.
f_slow_x;
2781 const double * __restrict f_slow_y =
patch->patchDataSOA.
f_slow_y;
2782 const double * __restrict f_slow_z =
patch->patchDataSOA.
f_slow_z;
2783 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2784 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2785 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2800 if(this->patch->getPatchID() == 538){
2808 fprintf(stderr,
"Old Velocities %lf %lf %lf\n", vel_x[0], vel_y[0], vel_z[ 0]);
2809 fprintf(stderr,
"Adding forces %lf %lf %lf %lf %lf %lf %lf %lf %lf\n",
2810 f_slow_x[43], f_slow_y[43], f_slow_z[43],
2811 f_nbond_x[43], f_nbond_y[43], f_nbond_z[43],
2812 f_normal_x[43], f_normal_y[43], f_normal_z[43]);
2815 switch (maxForceNumber) {
2818 for (
int i=0; i < numAtoms; i++) {
2819 vel_x[i] += f_slow_x[i] * recipMass[i] * dt_slow;
2820 vel_y[i] += f_slow_y[i] * recipMass[i] * dt_slow;
2821 vel_z[i] += f_slow_z[i] * recipMass[i] * dt_slow;
2825 dt_nbond *= scaling;
2826 for (
int i=0; i < numAtoms; i++) {
2827 vel_x[i] += f_nbond_x[i] * recipMass[i] * dt_nbond;
2828 vel_y[i] += f_nbond_y[i] * recipMass[i] * dt_nbond;
2829 vel_z[i] += f_nbond_z[i] * recipMass[i] * dt_nbond;
2833 dt_normal *= scaling;
2834 for (
int i=0; i < numAtoms; i++) {
2835 vel_x[i] += f_normal_x[i] * recipMass[i] * dt_normal;
2836 vel_y[i] += f_normal_y[i] * recipMass[i] * dt_normal;
2837 vel_z[i] += f_normal_z[i] * recipMass[i] * dt_normal;
2850 const double * __restrict vel_x,
2851 const double * __restrict vel_y,
2852 const double * __restrict vel_z,
2853 double * __restrict pos_x,
2854 double * __restrict pos_y,
2855 double * __restrict pos_z,
2860 NamdProfileEvent::ADD_VELOCITY_TO_POSITION_SOA);
2861 #ifdef SOA_SIMPLIFY_PARAMS 2862 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2863 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2864 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2865 double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
2866 double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
2867 double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
2870 for (
int i=0; i < numAtoms; i++) {
2871 pos_x[i] += vel_x[i] * dt;
2872 pos_y[i] += vel_y[i] * dt;
2873 pos_z[i] += vel_z[i] * dt;
2876 if(this->patch->getPatchID() == 538){
2877 fprintf(stderr,
"New Positions %lf %lf %lf\n", pos_x[43], pos_y[43], pos_z[43]);
2878 fprintf(stderr,
"New Velocities %lf %lf %lf\n", vel_x[43], vel_y[43], vel_z[43]);
2887 const int * __restrict hydrogenGroupSize,
2888 const float * __restrict mass,
2889 const double * __restrict vel_x,
2890 const double * __restrict vel_y,
2891 const double * __restrict vel_z,
2896 NamdProfileEvent::SUBMIT_HALFSTEP_SOA);
2897 #ifdef SOA_SIMPLIFY_PARAMS 2899 const float * __restrict mass =
patch->patchDataSOA.
mass;
2900 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
2901 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
2902 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
2908 for (
int i=0; i < numAtoms; i++) {
2910 kineticEnergy += mass[i] *
2911 (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
2913 virial.
xx += mass[i] * vel_x[i] * vel_x[i];
2914 virial.
xy += mass[i] * vel_x[i] * vel_y[i];
2915 virial.
xz += mass[i] * vel_x[i] * vel_z[i];
2916 virial.
yx += mass[i] * vel_y[i] * vel_x[i];
2917 virial.
yy += mass[i] * vel_y[i] * vel_y[i];
2918 virial.
yz += mass[i] * vel_y[i] * vel_z[i];
2919 virial.
zx += mass[i] * vel_z[i] * vel_x[i];
2920 virial.
zy += mass[i] * vel_z[i] * vel_y[i];
2921 virial.
zz += mass[i] * vel_z[i] * vel_z[i];
2923 kineticEnergy *= 0.5 * 0.5;
2934 for (
int i=0; i < numAtoms; i += hgs) {
2937 hgs = hydrogenGroupSize[i];
2942 for (
int j = i; j < (i+hgs); j++) {
2944 v_cm_x += mass[j] * vel_x[j];
2945 v_cm_y += mass[j] * vel_y[j];
2946 v_cm_z += mass[j] * vel_z[j];
2948 BigReal recip_m_cm = 1.0 / m_cm;
2949 v_cm_x *= recip_m_cm;
2950 v_cm_y *= recip_m_cm;
2951 v_cm_z *= recip_m_cm;
2953 for (
int j = i; j < (i+hgs); j++) {
2954 BigReal dv_x = vel_x[j] - v_cm_x;
2955 BigReal dv_y = vel_y[j] - v_cm_y;
2956 BigReal dv_z = vel_z[j] - v_cm_z;
2958 intKineticEnergy += mass[j] *
2959 (vel_x[j] * dv_x + vel_y[j] * dv_y + vel_z[j] * dv_z);
2961 intVirialNormal.
xx += mass[j] * vel_x[j] * dv_x;
2962 intVirialNormal.
xy += mass[j] * vel_x[j] * dv_y;
2963 intVirialNormal.
xz += mass[j] * vel_x[j] * dv_z;
2964 intVirialNormal.
yx += mass[j] * vel_y[j] * dv_x;
2965 intVirialNormal.
yy += mass[j] * vel_y[j] * dv_y;
2966 intVirialNormal.
yz += mass[j] * vel_y[j] * dv_z;
2967 intVirialNormal.
zx += mass[j] * vel_z[j] * dv_x;
2968 intVirialNormal.
zy += mass[j] * vel_z[j] * dv_y;
2969 intVirialNormal.
zz += mass[j] * vel_z[j] * dv_z;
2972 intKineticEnergy *= 0.5 * 0.5;
2973 intVirialNormal *= 0.5;
2975 += intKineticEnergy;
2987 const int * __restrict hydrogenGroupSize,
2988 const float * __restrict mass,
2989 const double * __restrict pos_x,
2990 const double * __restrict pos_y,
2991 const double * __restrict pos_z,
2992 const double * __restrict vel_x,
2993 const double * __restrict vel_y,
2994 const double * __restrict vel_z,
2995 const double * __restrict f_normal_x,
2996 const double * __restrict f_normal_y,
2997 const double * __restrict f_normal_z,
2998 const double * __restrict f_nbond_x,
2999 const double * __restrict f_nbond_y,
3000 const double * __restrict f_nbond_z,
3001 const double * __restrict f_slow_x,
3002 const double * __restrict f_slow_y,
3003 const double * __restrict f_slow_z,
3008 NamdProfileEvent::SUBMIT_REDUCTIONS_SOA);
3009 #ifdef SOA_SIMPLIFY_PARAMS 3011 const float * __restrict mass =
patch->patchDataSOA.
mass;
3012 const double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
3013 const double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
3014 const double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
3015 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3016 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3017 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3018 const double * __restrict f_normal_x =
patch->patchDataSOA.
f_normal_x;
3019 const double * __restrict f_normal_y =
patch->patchDataSOA.
f_normal_y;
3020 const double * __restrict f_normal_z =
patch->patchDataSOA.
f_normal_z;
3021 const double * __restrict f_nbond_x =
patch->patchDataSOA.
f_nbond_x;
3022 const double * __restrict f_nbond_y =
patch->patchDataSOA.
f_nbond_y;
3023 const double * __restrict f_nbond_z =
patch->patchDataSOA.
f_nbond_z;
3024 const double * __restrict f_slow_x =
patch->patchDataSOA.
f_slow_x;
3025 const double * __restrict f_slow_y =
patch->patchDataSOA.
f_slow_y;
3026 const double * __restrict f_slow_z =
patch->patchDataSOA.
f_slow_z;
3038 BigReal angularMomentum_x = 0;
3039 BigReal angularMomentum_y = 0;
3040 BigReal angularMomentum_z = 0;
3047 for (
int i=0; i < numAtoms; i++) {
3050 kineticEnergy += mass[i] *
3051 (vel_x[i]*vel_x[i] + vel_y[i]*vel_y[i] + vel_z[i]*vel_z[i]);
3054 momentum_x += mass[i] * vel_x[i];
3055 momentum_y += mass[i] * vel_y[i];
3056 momentum_z += mass[i] * vel_z[i];
3059 BigReal dpos_x = pos_x[i] - origin_x;
3060 BigReal dpos_y = pos_y[i] - origin_y;
3061 BigReal dpos_z = pos_z[i] - origin_z;
3064 angularMomentum_x += mass[i] * (dpos_y*vel_z[i] - dpos_z*vel_y[i]);
3065 angularMomentum_y += mass[i] * (dpos_z*vel_x[i] - dpos_x*vel_z[i]);
3066 angularMomentum_z += mass[i] * (dpos_x*vel_y[i] - dpos_y*vel_x[i]);
3071 kineticEnergy *= 0.5;
3072 Vector momentum(momentum_x, momentum_y, momentum_z);
3073 Vector angularMomentum(angularMomentum_x, angularMomentum_y,
3088 for (
int i=0; i < numAtoms; i += hgs) {
3089 hgs = hydrogenGroupSize[i];
3098 for ( j = i; j < (i+hgs); ++j ) {
3100 r_cm_x += mass[j] * pos_x[j];
3101 r_cm_y += mass[j] * pos_y[j];
3102 r_cm_z += mass[j] * pos_z[j];
3103 v_cm_x += mass[j] * vel_x[j];
3104 v_cm_y += mass[j] * vel_y[j];
3105 v_cm_z += mass[j] * vel_z[j];
3116 for ( j = i; j < (i+hgs); ++j ) {
3130 intKineticEnergy += mass[j] *
3131 (v_x * dv_x + v_y * dv_y + v_z * dv_z);
3134 BigReal dr_x = pos_x[j] - r_cm_x;
3135 BigReal dr_y = pos_y[j] - r_cm_y;
3136 BigReal dr_z = pos_z[j] - r_cm_z;
3139 intVirialNormal.
xx += f_normal_x[j] * dr_x;
3140 intVirialNormal.
xy += f_normal_x[j] * dr_y;
3141 intVirialNormal.
xz += f_normal_x[j] * dr_z;
3142 intVirialNormal.
yx += f_normal_y[j] * dr_x;
3143 intVirialNormal.
yy += f_normal_y[j] * dr_y;
3144 intVirialNormal.
yz += f_normal_y[j] * dr_z;
3145 intVirialNormal.
zx += f_normal_z[j] * dr_x;
3146 intVirialNormal.
zy += f_normal_z[j] * dr_y;
3147 intVirialNormal.
zz += f_normal_z[j] * dr_z;
3150 intVirialNbond.
xx += f_nbond_x[j] * dr_x;
3151 intVirialNbond.
xy += f_nbond_x[j] * dr_y;
3152 intVirialNbond.
xz += f_nbond_x[j] * dr_z;
3153 intVirialNbond.
yx += f_nbond_y[j] * dr_x;
3154 intVirialNbond.
yy += f_nbond_y[j] * dr_y;
3155 intVirialNbond.
yz += f_nbond_y[j] * dr_z;
3156 intVirialNbond.
zx += f_nbond_z[j] * dr_x;
3157 intVirialNbond.
zy += f_nbond_z[j] * dr_y;
3158 intVirialNbond.
zz += f_nbond_z[j] * dr_z;
3161 intVirialSlow.
xx += f_slow_x[j] * dr_x;
3162 intVirialSlow.
xy += f_slow_x[j] * dr_y;
3163 intVirialSlow.
xz += f_slow_x[j] * dr_z;
3164 intVirialSlow.
yx += f_slow_y[j] * dr_x;
3165 intVirialSlow.
yy += f_slow_y[j] * dr_y;
3166 intVirialSlow.
yz += f_slow_y[j] * dr_z;
3167 intVirialSlow.
zx += f_slow_z[j] * dr_x;
3168 intVirialSlow.
zy += f_slow_z[j] * dr_y;
3169 intVirialSlow.
zz += f_slow_z[j] * dr_z;
3173 intKineticEnergy *= 0.5;
3198 NamdProfileEvent::SUBMIT_COLLECTIONS_SOA);
3213 if ( is_pos_needed || is_vel_needed ) {
3214 patch->copy_updates_to_AOS();
3223 patch->copy_forces_to_AOS();
3225 if ( is_pos_needed ) {
3228 if ( is_vel_needed ) {
3231 if ( is_f_needed ) {
3241 const double maxvel2
3244 const double * __restrict vel_x,
3245 const double * __restrict vel_y,
3246 const double * __restrict vel_z,
3251 #ifdef SOA_SIMPLIFY_PARAMS 3252 const double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3253 const double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3254 const double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3262 for (
int i=0; i < numAtoms; i++) {
3264 vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3265 killme = killme + ( vel2 > maxvel2 );
3272 for (
int i=0; i < numAtoms; i++) {
3274 vel_x[i] * vel_x[i] + vel_y[i] * vel_y[i] + vel_z[i] * vel_z[i];
3275 if (vel2 > maxvel2) {
3277 const Vector vel(vel_x[i], vel_y[i], vel_z[i]);
3278 const BigReal maxvel = sqrt(maxvel2);
3280 iout <<
iERROR <<
"Atom " << (a[i].
id + 1) <<
" velocity is " 3283 << i <<
" of " << numAtoms <<
" on patch " 3288 "Atoms moving too fast; simulation has become unstable (" 3290 <<
" pe " << CkMyPe() <<
").\n" <<
endi;
3301 const float * __restrict langevinParam,
3302 double * __restrict vel_x,
3303 double * __restrict vel_y,
3304 double * __restrict vel_z,
3309 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1_SOA);
3310 #ifdef SOA_SIMPLIFY_PARAMS 3312 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3313 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3314 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3330 for (
int i=0; i < numAtoms; i++) {
3331 BigReal dt_gamma = dt * langevinParam[i];
3334 BigReal scaling = 1. - 0.5 * dt_gamma;
3335 vel_x[i] *= scaling;
3336 vel_y[i] *= scaling;
3337 vel_z[i] *= scaling;
3347 const float * __restrict langevinParam,
3348 const float * __restrict langScalVelBBK2,
3349 const float * __restrict langScalRandBBK2,
3350 float * __restrict gaussrand_x,
3351 float * __restrict gaussrand_y,
3352 float * __restrict gaussrand_z,
3353 double * __restrict vel_x,
3354 double * __restrict vel_y,
3355 double * __restrict vel_z,
3361 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2_SOA);
3362 #ifdef SOA_SIMPLIFY_PARAMS 3369 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3370 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3371 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3399 for (
int i=0; i < numAtoms; i++) {
3402 gaussrand_x[i] = float(rg.
x);
3403 gaussrand_y[i] = float(rg.
y);
3404 gaussrand_z[i] = float(rg.
z);
3415 for (
int i=0; i < numAtoms; i++) {
3416 vel_x[i] += gaussrand_x[i] * langScalRandBBK2[i];
3417 vel_y[i] += gaussrand_y[i] * langScalRandBBK2[i];
3418 vel_z[i] += gaussrand_z[i] * langScalRandBBK2[i];
3419 vel_x[i] *= langScalVelBBK2[i];
3420 vel_y[i] *= langScalVelBBK2[i];
3421 vel_z[i] *= langScalVelBBK2[i];
3428 const int * __restrict hydrogenGroupSize,
3429 const float * __restrict mass,
3430 double * __restrict pos_x,
3431 double * __restrict pos_y,
3432 double * __restrict pos_z,
3437 #ifdef SOA_SIMPLIFY_PARAMS 3439 const float * __restrict mass =
patch->patchDataSOA.
mass;
3440 double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
3441 double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
3442 double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
3461 for (
int i = 0; i < numAtoms; i += hgs) {
3463 hgs = hydrogenGroupSize[i];
3470 for ( j = i; j < (i+hgs); ++j ) {
3472 r_cm_x += mass[j] * pos_x[j];
3473 r_cm_y += mass[j] * pos_y[j];
3474 r_cm_z += mass[j] * pos_z[j];
3482 double tx = r_cm_x - origin.
x;
3483 double ty = r_cm_y - origin.
y;
3484 double tz = r_cm_z - origin.
z;
3486 double new_r_cm_x = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3487 double new_r_cm_y = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3488 double new_r_cm_z = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3490 new_r_cm_x += origin.
x;
3491 new_r_cm_y += origin.
y;
3492 new_r_cm_z += origin.
z;
3494 double delta_r_cm_x = new_r_cm_x - r_cm_x;
3495 double delta_r_cm_y = new_r_cm_y - r_cm_y;
3496 double delta_r_cm_z = new_r_cm_z - r_cm_z;
3498 for (j = i; j < (i+hgs); ++j) {
3499 pos_x[j] += delta_r_cm_x;
3500 pos_y[j] += delta_r_cm_y;
3501 pos_z[j] += delta_r_cm_z;
3505 for (
int i = 0; i < numAtoms; ++i) {
3509 double tx = pos_x[i] - origin.
x;
3510 double ty = pos_y[i] - origin.
y;
3511 double tz = pos_z[i] - origin.
z;
3513 double ftx = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3514 double fty = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3515 double ftz = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3517 pos_x[i] = ftx + origin.
x;
3518 pos_y[i] = fty + origin.
y;
3519 pos_z[i] = ftz + origin.
z;
3527 const int * __restrict hydrogenGroupSize,
3528 const float * __restrict mass,
3529 double * __restrict pos_x,
3530 double * __restrict pos_y,
3531 double * __restrict pos_z,
3532 double * __restrict vel_x,
3533 double * __restrict vel_y,
3534 double * __restrict vel_z,
3540 #ifdef SOA_SIMPLIFY_PARAMS 3542 const float * __restrict mass =
patch->patchDataSOA.
mass;
3543 double * __restrict pos_x =
patch->patchDataSOA.
pos_x;
3544 double * __restrict pos_y =
patch->patchDataSOA.
pos_y;
3545 double * __restrict pos_z =
patch->patchDataSOA.
pos_z;
3546 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3547 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3548 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3570 for (
int i=0; i < numAtoms; i += hgs) {
3572 hgs = hydrogenGroupSize[i];
3581 for ( j = i; j < (i+hgs); ++j ) {
3583 r_cm_x += mass[j] * pos_x[j];
3584 r_cm_y += mass[j] * pos_y[j];
3585 r_cm_z += mass[j] * pos_z[j];
3586 v_cm_x += mass[j] * vel_x[j];
3587 v_cm_y += mass[j] * vel_y[j];
3588 v_cm_z += mass[j] * vel_z[j];
3595 double tx = r_cm_x - origin.
x;
3596 double ty = r_cm_y - origin.
y;
3597 double tz = r_cm_z - origin.
z;
3598 double new_r_cm_x = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3599 double new_r_cm_y = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3600 double new_r_cm_z = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3601 new_r_cm_x += origin.
x;
3602 new_r_cm_y += origin.
y;
3603 new_r_cm_z += origin.
z;
3605 double delta_r_cm_x = new_r_cm_x - r_cm_x;
3606 double delta_r_cm_y = new_r_cm_y - r_cm_y;
3607 double delta_r_cm_z = new_r_cm_z - r_cm_z;
3611 double delta_v_cm_x = ( velFactor_x - 1 ) * v_cm_x;
3612 double delta_v_cm_y = ( velFactor_y - 1 ) * v_cm_y;
3613 double delta_v_cm_z = ( velFactor_z - 1 ) * v_cm_z;
3614 for (j = i; j < (i+hgs); j++) {
3615 pos_x[j] += delta_r_cm_x;
3616 pos_y[j] += delta_r_cm_y;
3617 pos_z[j] += delta_r_cm_z;
3618 vel_x[j] += delta_v_cm_x;
3619 vel_y[j] += delta_v_cm_y;
3620 vel_z[j] += delta_v_cm_z;
3629 for (
int i=0; i < numAtoms; i++) {
3630 double tx = pos_x[i] - origin.
x;
3631 double ty = pos_y[i] - origin.
y;
3632 double tz = pos_z[i] - origin.
z;
3633 double ftx = factor.
xx*tx + factor.
xy*ty + factor.
xz*tz;
3634 double fty = factor.
yx*tx + factor.
yy*ty + factor.
yz*tz;
3635 double ftz = factor.
zx*tx + factor.
zy*ty + factor.
zz*tz;
3636 pos_x[i] = ftx + origin.
x;
3637 pos_y[i] = fty + origin.
y;
3638 pos_z[i] = ftz + origin.
z;
3639 vel_x[i] *= velFactor_x;
3640 vel_y[i] *= velFactor_y;
3641 vel_z[i] *= velFactor_z;
3659 Tensor *vp = ( pressure ? &virial : 0 );
3663 "Constraint failure; simulation has become unstable.\n" <<
endi;
3674 #if (defined(NAMD_CUDA) || defined(NAMD_HIP)) || defined(NAMD_MIC) 3689 #if defined(NTESTPID) 3693 double *xyzq =
new double[4*numAtoms];
3698 for (
int i=0; i < numAtoms; i++) {
3704 char fname[128], remark[128];
3705 sprintf(fname,
"xyzq_soa_pid%d_step%d.bin", NTESTPID, step);
3706 sprintf(remark,
"SOA xyzq, patch %d, step %d", NTESTPID, step);
3707 TestArray_write<double>(fname, remark, xyzq, 4*numAtoms);
3712 patch->zero_global_forces_SOA();
3716 int basePriority = ( (seq & 0xffff) << 15 )
3727 #ifdef NODEGROUP_FORCE_REGISTER 3729 patch->copy_forces_to_SOA();
3732 patch->copy_forces_to_SOA();
3735 #if defined(NTESTPID) 3741 double *fxyz =
new double[3*numAtoms];
3745 for (
int i=0; i < numAtoms; i++) {
3747 fxyz[3*i+1] = fy[i];
3748 fxyz[3*i+2] = fz[i];
3750 sprintf(fname,
"fxyz_normal_soa_pid%d_step%d.bin", NTESTPID, step);
3751 sprintf(remark,
"SOA fxyz normal, patch %d, step %d", NTESTPID, step);
3752 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3756 for (
int i=0; i < numAtoms; i++) {
3758 fxyz[3*i+1] = fy[i];
3759 fxyz[3*i+2] = fz[i];
3761 sprintf(fname,
"fxyz_nbond_soa_pid%d_step%d.bin", NTESTPID, step);
3762 sprintf(remark,
"SOA fxyz nonbonded, patch %d, step %d", NTESTPID, step);
3763 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3767 for (
int i=0; i < numAtoms; i++) {
3769 fxyz[3*i+1] = fy[i];
3770 fxyz[3*i+2] = fz[i];
3772 sprintf(fname,
"fxyz_slow_soa_pid%d_step%d.bin", NTESTPID, step);
3773 sprintf(remark,
"SOA fxyz slow, patch %d, step %d", NTESTPID, step);
3774 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3782 double *fxyz =
new double[3*numAtoms];
3783 double *fx, *fy, *fz;
3784 char fname[64], remark[128];
3790 for (
int i=0; i < numAtoms; i++) {
3792 fxyz[3*i+1] = fy[i];
3793 fxyz[3*i+2] = fz[i];
3795 sprintf(fname,
"fslow_soa_%d.bin", step);
3796 sprintf(remark,
"SOA slow forces, step %d\n", step);
3797 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3802 for (
int i=0; i < numAtoms; i++) {
3804 fxyz[3*i+1] = fy[i];
3805 fxyz[3*i+2] = fz[i];
3807 sprintf(fname,
"fnbond_soa_%d.bin", step);
3808 sprintf(remark,
"SOA nonbonded forces, step %d\n", step);
3809 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3814 for (
int i=0; i < numAtoms; i++) {
3816 fxyz[3*i+1] = fy[i];
3817 fxyz[3*i+2] = fz[i];
3819 sprintf(fname,
"fnormal_soa_%d.bin", step);
3820 sprintf(remark,
"SOA normal forces, step %d\n", step);
3821 TestArray_write<double>(fname, remark, fxyz, 3*numAtoms);
3830 fprintf(stderr,
"CPU force arrays for alanin\n" );
3832 fprintf(stderr,
"f[%i] = %lf %lf %lf | %lf %lf %lf | %lf %lf %lf\n", i,
3861 double * __restrict vel_x =
patch->patchDataSOA.
vel_x;
3862 double * __restrict vel_y =
patch->patchDataSOA.
vel_y;
3863 double * __restrict vel_z =
patch->patchDataSOA.
vel_z;
3867 DebugM(4,
"stochastically rescaling velocities at step " << step <<
" by " << velrescaling <<
"\n");
3868 for (
int i = 0; i < numAtoms; ++i ) {
3869 vel_x[i] *= velrescaling;
3870 vel_y[i] *= velrescaling;
3871 vel_z[i] *= velrescaling;
3882 #endif // SEQUENCER_SOA 3889 char tracePrefix[20];
3899 #ifdef TIMER_COLLECTION 3900 TimerSet& t =
patch->timerSet;
3936 const BigReal nbondstep = timestep * (staleForces?1:nonbondedFrequency);
3938 doNonbonded = (step >= numberOfSteps) || !(step%nonbondedFrequency);
3945 if ( dofull )
slowFreq = fullElectFrequency;
3946 const BigReal slowstep = timestep * (staleForces?1:fullElectFrequency);
3948 doFullElectrostatics = (dofull && ((step >= numberOfSteps) || !(step%fullElectFrequency)));
3963 if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed =
Results::amdf;
3993 int doGlobal = doTcl || doColvars;
3999 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 4027 if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4032 doEnergy = ! ( step % energyFrequency );
4034 if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4036 if ( adaptTempOn ) doEnergy=1;
4039 D_MSG(
"runComputeObjects()");
4047 if ( staleForces || doGlobal ) {
4053 D_MSG(
"newtonianVelocities()");
4067 D_MSG(
"submitHalfstep()");
4073 D_MSG(
"newtonianVelocities()");
4086 D_MSG(
"submitHalfstep()");
4090 if ( zeroMomentum && doFullElectrostatics )
submitMomentum(step);
4092 D_MSG(
"newtonianVelocities()");
4099 D_MSG(
"submitReductions()");
4107 sprintf(traceNote,
"%s%d",tracePrefix,step);
4108 traceUserSuppliedNote(traceNote);
4116 bool doMultigratorRattle =
false;
4130 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 4134 int beginStep =
simParams->beginEventStep;
4139 for ( ++step; step <= numberOfSteps; ++step )
4141 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED) 4142 eon = epid && (beginStep < step && step <= endStep);
4144 if (controlProfiling && step == beginStep) {
4147 if (controlProfiling && step == endStep) {
4154 DebugM(3,
"for step "<<step<<
" dGlobal " << doGlobal<<
"\n"<<
endi);
4165 newtonianVelocities(0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4170 if (doMultigratorRattle)
rattle1(timestep, 1);
4223 #endif // UPPER_BOUND 4225 doNonbonded = !(step%nonbondedFrequency);
4226 doFullElectrostatics = (dofull && !(step%fullElectFrequency));
4233 if ( zeroMomentum && doFullElectrostatics ) {
4252 if ( accelMDOn && (accelMDdihe || accelMDdual)) maxForceUsed =
Results::amdf;
4255 doEnergy = ! ( step % energyFrequency );
4256 if ( accelMDOn && !accelMDdihe ) doEnergy=1;
4257 if ( adaptTempOn ) doEnergy=1;
4281 if ( staleForces || doGlobal ) {
4287 if ( !commOnly && ( reassignFreq>0 ) && ! (step%reassignFreq) ) {
4289 newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4298 newtonianVelocities(1.0,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4318 if ( zeroMomentum && doFullElectrostatics )
submitMomentum(step);
4322 newtonianVelocities(-0.5,timestep,nbondstep,slowstep,staleForces,doNonbonded,doFullElectrostatics);
4361 if(step == bstep || step == estep){
4366 #ifdef MEASURE_NAMD_WITH_PAPI 4368 int bstep =
simParams->papiMeasureStartStep;
4369 int estep = bstep +
simParams->numPapiMeasureSteps;
4370 if(step == bstep || step==estep) {
4371 papiMeasureBarrier(step);
4378 sprintf(traceNote,
"%s%d",tracePrefix,step);
4379 traceUserSuppliedNote(traceNote);
4381 #endif // UPPER_BOUND 4386 cycleBarrier(dofull && !((step+1)%fullElectFrequency),step);
4390 if(step == START_HPM_STEP)
4391 (CProxy_Node(CkpvAccess(BOCclass_group).node)).startHPM();
4393 if(step == STOP_HPM_STEP)
4394 (CProxy_Node(CkpvAccess(BOCclass_group).node)).stopHPM();
4400 #ifdef TIMER_COLLECTION 4402 printf(
"Timer collection reporting in microseconds for " 4406 #endif // TIMER_COLLECTION 4420 Vector movDragVel, dragIncrement;
4421 for (
int i = 0; i < numAtoms; ++i )
4427 dragIncrement = movDragGlobVel * movDragVel * dt;
4441 Vector rotDragAxis, rotDragPivot, dragIncrement;
4442 for (
int i = 0; i < numAtoms; ++i )
4448 dAngle = rotDragGlobVel * rotDragVel * dt;
4449 rotDragAxis /= rotDragAxis.
length();
4450 atomRadius = atom[i].
position - rotDragPivot;
4451 dragIncrement = cross(rotDragAxis, atomRadius) * dAngle;
4465 #if 0 && defined(NODEGROUP_FORCE_REGISTER) 4468 const int stepsPerCycle_save = stepsPerCycle;
4484 doFullElectrostatics = dofull;
4506 int doGlobal = doTcl || doColvars;
4525 #ifdef DEBUG_MINIMIZE 4526 printf(
"doTcl = %d doColvars = %d\n", doTcl, doColvars);
4532 #ifdef DEBUG_MINIMIZE 4533 else { printf(
"No computeGlobal\n"); }
4543 for ( ++step; step <= numberOfSteps; ++step ) {
4586 patch->copy_atoms_to_SOA();
4590 #if 0 && defined(NODEGROUP_FORCE_REGISTER) 4608 for (
int i = 0; i < numAtoms; ++i ) {
4614 for (
int j=1; j<hgs; ++j ) {
4632 for (
int i = 0; i < numAtoms; ++i ) {
4635 if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4638 if ( fixedAtomsOn && a[i].atomFixed ) a[i].
velocity = 0;
4640 if ( v2 > maxv2 ) maxv2 = v2;
4646 for (
int i = 0; i < numAtoms; ++i ) {
4647 if ( drudeHardWallOn && i && (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4650 if ( fixedAtomsOn && a[i].atomFixed ) a[i].
velocity = 0;
4652 if ( v2 > maxv2 ) maxv2 = v2;
4662 for (
int i = 0; i < numAtoms; i += hgs ) {
4665 if ( minChildVel < fmax2 )
continue;
4666 int adjustChildren = 1;
4667 for (
int j = i+1; j < (i+hgs); ++j ) {
4668 if ( a[j].velocity.length2() > minChildVel ) adjustChildren = 0;
4670 if ( adjustChildren ) {
4672 for (
int j = i+1; j < (i+hgs); ++j ) {
4673 if (a[i].mass < 0.01)
continue;
4687 for (
int i = 0; i < numAtoms; ++i ) {
4692 for (
int i = 1; i < numAtoms; ++i ) {
4693 if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4702 for (
int i = 1; i < numAtoms; ++i ) {
4703 if ( (0.05 < a[i].mass) && ((a[i].mass < 1.0)) ) {
4715 for (
int i = 0; i < numAtoms; ++i ) {
4728 for (
int i = 0; i < numAtoms; ++i ) {
4733 for (
int i = 0; i < numAtoms; ++i ) {
4749 NAMD_die(
"Cannot zero momentum when fixed atoms are present.");
4760 for (
int i = 0; i < numAtoms; ++i ) {
4761 a[i].velocity += dv * a[i].recipMass;
4762 a[i].position += dx * a[i].recipMass;
4765 for (
int i = 0; i < numAtoms; ++i ) {
4766 a[i].velocity += dv;
4767 a[i].position += dx;
4779 NAMD_bug(
"Sequencer::scalePositionsVelocities, fixed atoms not implemented");
4783 for (
int i = 0; i < numAtoms; i += hgs ) {
4788 for (
int j=0;j < hgs;++j) {
4789 m_cm += a[i+j].
mass;
4798 for (
int j=0;j < hgs;++j) {
4804 for (
int i = 0; i < numAtoms; i++) {
4821 Tensor posScaleTensor = scaleTensor;
4850 for (
int i = 0; i < numAtoms; ++i ) {
4853 virialNormal.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4857 for (
int i = 0; i < numAtoms; ++i ) {
4858 if (a[i].mass < 0.01)
continue;
4860 virialNormal.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
4864 kineticEnergy *= 0.5;
4872 Vector fixForceNormal = 0;
4873 Vector fixForceNbond = 0;
4876 calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
4892 for (
int i = 0; i < numAtoms; i += hgs ) {
4898 for ( j = i; j < (i+hgs); ++j ) {
4909 NAMD_bug(
"Sequencer::multigratorPressure, this part needs to be implemented correctly");
4910 for ( j = i; j < (i+hgs); ++j ) {
4915 intVirialNormal2.
outerAdd (mass, v, dv);
4923 for ( j = i; j < (i+hgs); ++j ) {
4927 intVirialNormal2.
outerAdd(mass, v, dv);
4949 for (
int i = 0; i < numAtoms; i++) {
4960 for (
int i = 0; i < numAtoms; ++i ) {
4966 for (
int i = 0; i < numAtoms; ++i ) {
4970 kineticEnergy *= 0.5;
4971 return kineticEnergy;
4989 for (
int i = 0; i < numAtoms; i += hgs ) {
4995 for ( j = i; j < (i+hgs); ++j ) {
5000 momentumSqrSum.
outerAdd(1.0/m_cm, v_cm, v_cm);
5003 for (
int i = 0; i < numAtoms; i++) {
5004 momentumSqrSum.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5023 const int staleForces,
5024 const int doNonbonded,
5025 const int doFullElectrostatics)
5028 NamdProfileEvent::NEWTONIAN_VELOCITIES);
5031 if (staleForces || (doNonbonded && doFullElectrostatics)) {
5037 if (staleForces || doNonbonded)
5039 if (staleForces || doFullElectrostatics)
5066 for (
int i = 0; i < numAtoms; ++i )
5069 if ( ! dt_gamma )
continue;
5071 BigReal f1 = exp( -dt_gamma );
5072 BigReal f2 = sqrt( ( 1. - f1*f1 ) * kbT *
5084 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK1);
5094 for (i = 0; i < numAtoms; i++) {
5096 if (i < numAtoms-1 &&
5097 a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5109 if (dt_gamma != 0.0) {
5110 v_com *= ( 1. - 0.5 * dt_gamma );
5115 if (dt_gamma != 0.0) {
5116 v_bnd *= ( 1. - 0.5 * dt_gamma );
5127 if ( ! dt_gamma )
continue;
5129 a[i].
velocity *= ( 1. - 0.5 * dt_gamma );
5140 for ( i = 0; i < numAtoms; ++i )
5143 if ( ! dt_gamma )
continue;
5145 a[i].
velocity *= ( 1. - 0.5 * dt_gamma );
5157 NamdProfileEvent::LANGEVIN_VELOCITIES_BBK2);
5184 for (i = 0; i < numAtoms; i++) {
5186 if (i < numAtoms-1 &&
5187 a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
5199 if (dt_gamma != 0.0) {
5202 sqrt( 2 * dt_gamma * kbT *
5203 ( a[i].
partition ? tempFactor : 1.0 ) / mass );
5204 v_com /= ( 1. + 0.5 * dt_gamma );
5209 if (dt_gamma != 0.0) {
5212 sqrt( 2 * dt_gamma * kbT_bnd *
5213 ( a[i+1].
partition ? tempFactor : 1.0 ) / mass );
5214 v_bnd /= ( 1. + 0.5 * dt_gamma );
5225 if ( ! dt_gamma )
continue;
5228 sqrt( 2 * dt_gamma * kbT *
5229 ( a[i].
partition ? tempFactor : 1.0 ) * a[i].recipMass );
5230 a[i].
velocity /= ( 1. + 0.5 * dt_gamma );
5244 for ( i = 0; i < numAtoms; ++i )
5247 if ( ! dt_gamma )
continue;
5250 sqrt( 2 * dt_gamma * kbT *
5251 ( a[i].
partition ? tempFactor : 1.0 ) * a[i].recipMass );
5252 a[i].
velocity /= ( 1. + 0.5 * dt_gamma );
5276 for (
int i = 0; i < numAtoms; i += hgs ) {
5280 for ( j = i; j < (i+hgs); ++j ) {
5282 a[j].fixedPosition,a[j].transform);
5288 for ( j = i; j < (i+hgs); ++j ) {
5296 Position delta_x_cm = new_x_cm - x_cm;
5297 for ( j = i; j < (i+hgs); ++j ) {
5300 a[j].fixedPosition,a[j].transform);
5309 for (
int i = 0; i < numAtoms; ++i )
5313 a[i].fixedPosition,a[i].transform);
5345 for (
int i = 0; i < numAtoms; i += hgs ) {
5349 for ( j = i; j < (i+hgs); ++j ) {
5351 a[j].fixedPosition,a[j].transform);
5358 for ( j = i; j < (i+hgs); ++j ) {
5367 Position delta_x_cm = new_x_cm - x_cm;
5370 delta_v_cm.
x = ( velFactor.
x - 1 ) * v_cm.
x;
5371 delta_v_cm.
y = ( velFactor.
y - 1 ) * v_cm.
y;
5372 delta_v_cm.
z = ( velFactor.
z - 1 ) * v_cm.
z;
5373 for ( j = i; j < (i+hgs); ++j ) {
5376 a[j].fixedPosition,a[j].transform);
5387 for (
int i = 0; i < numAtoms; ++i )
5391 a[i].fixedPosition,a[i].transform);
5408 if ( rescaleFreq > 0 ) {
5415 for (
int i = 0; i < numAtoms; ++i )
5431 const BigReal factor_dihe = accelMDfactor[0];
5432 const BigReal factor_tot = accelMDfactor[1];
5436 NAMD_die(
"accelMD broadcasting error!\n");
5438 NAMD_die(
"accelMD broadcasting error!\n");
5441 for (
int i = 0; i < numAtoms; ++i)
5447 for (
int i = 0; i < numAtoms; ++i)
5450 for (
int i = 0; i < numAtoms; ++i)
5453 if (doFullElectrostatics) {
5454 for (
int i = 0; i < numAtoms; ++i)
5460 for (
int i = 0; i < numAtoms; ++i)
5471 if ( (step < simParams->adaptTempFirstStep ) ||
5486 if ( ( reassignFreq > 0 ) && ! ( step % reassignFreq ) ) {
5495 if ( newTemp < simParams->reassignHold )
5503 for (
int i = 0; i < numAtoms; ++i )
5507 sqrt(kbT * (a[i].
partition ? tempFactor : 1.0) * a[i].recipMass) *
5511 NAMD_bug(
"Sequencer::reassignVelocities called improperly!");
5525 for (
int i = 0; i < numAtoms; ++i )
5528 a[i].mass <= 0.) ?
Vector(0,0,0) :
5529 sqrt(kbT * (a[i].
partition ? tempFactor : 1.0) * a[i].recipMass) *
5542 for (
int i = 0; i < numAtoms; ++i )
5553 for (
int i = 0; i < numAtoms; ++i )
5565 BigReal sqrt_factor = sqrt(factor);
5568 for (
int i = 0; i < numAtoms; ++i ) {
5587 for (
int i = 0; i < numAtoms; ++i )
5589 BigReal f1 = exp( coefficient * a[i].langevinParam );
5607 DebugM(4,
"stochastically rescaling velocities at step " << step <<
" by " << velrescaling <<
"\n");
5608 for (
int i = 0; i < numAtoms; ++i ) {
5627 BigReal timestep,
const int ftag,
const int useSaved
5630 NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5632 CmiNetworkProgressAfter (0);
5642 const BigReal timestep1,
const int ftag1,
const int useSaved1,
5643 const BigReal timestep2,
const int ftag2,
const int useSaved2,
5644 const BigReal timestep3,
const int ftag3,
const int useSaved3
5647 NamdProfileEvent::ADD_FORCE_TO_MOMENTUM);
5649 CmiNetworkProgressAfter (0);
5668 NamdProfileEvent::ADD_VELOCITY_TO_POSITION);
5670 CmiNetworkProgressAfter (0);
5681 Tensor *vp = ( pressure ? &virial : 0 );
5683 iout <<
iERROR <<
"Constraint failure in HardWallDrude(); " 5684 <<
"simulation may become unstable.\n" <<
endi;
5697 Tensor *vp = ( pressure ? &virial : 0 );
5700 "Constraint failure; simulation has become unstable.\n" <<
endi;
5705 printf(
"virial = %g %g %g %g %g %g %g %g %g\n",
5706 virial.
xx, virial.
xy, virial.
xz,
5707 virial.
yx, virial.
yy, virial.
yz,
5708 virial.
zx, virial.
zy, virial.
zz);
5715 printf(
"pos[%d] = %g %g %g\n", n,
5719 printf(
"vel[%d] = %g %g %g\n", n,
5724 printf(
"force[%d] = %g %g %g\n", n,
5758 const BigReal maxvel2 = maxvel * maxvel;
5759 for (
int i=0; i<numAtoms; ++i ) {
5760 if ( a[i].velocity.length2() > maxvel2 ) {
5767 const BigReal maxvel2 = maxvel * maxvel;
5769 for (
int i=0; i<numAtoms; ++i ) {
5774 for (
int i=0; i<numAtoms; ++i ) {
5775 if ( a[i].velocity.length2() > maxvel2 ) {
5777 iout <<
iERROR <<
"Atom " << (a[i].
id + 1) <<
" velocity is " 5780 << i <<
" of " << numAtoms <<
" on patch " 5785 "Atoms moving too fast; simulation has become unstable (" 5787 <<
" pe " << CkMyPe() <<
").\n" <<
endi;
5799 for (
int i=0; i<numAtoms; ++i ) {
5815 CmiNetworkProgressAfter (0);
5826 for (
int i = 0; i < numAtoms; ++i ) {
5829 virial.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5833 for (
int i = 0; i < numAtoms; ++i ) {
5834 if (a[i].mass < 0.01)
continue;
5836 virial.
outerAdd(a[i].mass, a[i].velocity, a[i].velocity);
5841 momentumSqrSum = virial;
5843 kineticEnergy *= 0.5 * 0.5;
5867 for (
int i=0; i<numAtoms; i += hgs) {
5874 for (j=i; j< i+hgs; ++j) {
5879 for (j=i; j < i+hgs; ++j) {
5881 if (! (useGroupPressure && j != i)) {
5883 int slab = (int)floor((z-zmin)*idz);
5884 if (slab < 0) slab += nslabs;
5885 else if (slab >= nslabs) slab -= nslabs;
5889 if (useGroupPressure) {
5912 for (
int i = 0; i < numAtoms; i += hgs ) {
5915 CmiNetworkProgress ();
5922 for ( j = i; j < (i+hgs); ++j ) {
5927 momentumSqrSum.
outerAdd(1.0/m_cm, v_cm, v_cm);
5932 for ( j = i; j < (i+hgs); ++j ) {
5937 intKineticEnergy += mass * (v * dv);
5938 intVirialNormal.
outerAdd (mass, v, dv);
5942 for ( j = i; j < (i+hgs); ++j ) {
5946 intKineticEnergy += mass * (v * dv);
5947 intVirialNormal.
outerAdd(mass, v, dv);
5951 intKineticEnergy *= 0.5 * 0.5;
5953 intVirialNormal *= 0.5;
5956 momentumSqrSum *= 0.5;
5969 for (
int j = 0; j < numAtoms; j++ ) {
5987 NamdProfileEvent::SUBMIT_REDUCTIONS);
5993 CmiNetworkProgressAfter(0);
6005 Vector angularMomentum = 0;
6010 for (i = 0; i < numAtoms; ++i ) {
6014 angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
6018 for (i = 0; i < numAtoms; ++i ) {
6021 angularMomentum += cross(a[i].mass,a[i].position-o,a[i].velocity);
6027 for (i = 0; i < numAtoms; i++) {
6028 if (i < numAtoms-1 &&
6029 a[i+1].mass < 1.0 && a[i+1].mass > 0.05) {
6039 drudeComKE += m_com * v_com.
length2();
6040 drudeBondKE += m_bond * v_bond.length2();
6059 kineticEnergy *= 0.5;
6069 for (
int i = 0; i < numAtoms; ++i ) {
6076 for (
int i = 0; i < numAtoms; ++i ) {
6083 for (
int i = 0; i < numAtoms; ++i ) {
6099 for (
int i = 0; i < numAtoms; i += hgs ) {
6101 CmiNetworkProgress();
6108 for ( j = i; j < (i+hgs); ++j ) {
6118 for ( j = i; j < (i+hgs); ++j ) {
6120 ( pairInteractionSelf || a[j].
partition != 2 ) )
continue;
6122 if ( fixedAtomsOn && a[j].atomFixed )
continue;
6126 intKineticEnergy += mass * (v * dv);
6133 for ( j = i; j < (i+hgs); ++j ) {
6135 if ( fixedAtomsOn && a[j].atomFixed )
continue;
6139 intKineticEnergy += mass * (v * dv);
6148 intKineticEnergy *= 0.5;
6164 for (
int i=0; i<numAtoms; i += hgs) {
6169 for (j=i; j< i+hgs; ++j) {
6176 int slab = (int)floor((z-zmin)*idz);
6177 if (slab < 0) slab += nslabs;
6178 else if (slab >= nslabs) slab -= nslabs;
6180 int ppoffset = 3*(slab + nslabs*
partition);
6181 for (j=i; j < i+hgs; ++j) {
6187 BigReal wxx = (fnormal.
x + fnbond.
x + fslow.
x) * dx.
x;
6188 BigReal wyy = (fnormal.
y + fnbond.
y + fslow.
y) * dx.
y;
6189 BigReal wzz = (fnormal.
z + fnbond.
z + fslow.
z) * dx.
z;
6204 Vector fixForceNormal = 0;
6205 Vector fixForceNbond = 0;
6208 calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6211 auto printTensor = [](
const Tensor& t,
const std::string& name){
6212 CkPrintf(
"%s", name.c_str());
6213 CkPrintf(
"\n%12.5lf %12.5lf %12.5lf\n" 6214 "%12.5lf %12.5lf %12.5lf\n" 6215 "%12.5lf %12.5lf %12.5lf\n",
6220 printTensor(fixVirialNormal,
"fixVirialNormal = ");
6221 printTensor(fixVirialNbond,
"fixVirialNbond = ");
6222 printTensor(fixVirialSlow,
"fixVirialSlow = ");
6233 #endif // UPPER_BOUND 6250 const double drudeBondLen2 = drudeBondLen * drudeBondLen;
6252 const double drudeMove = 0.01;
6253 const double drudeStep2 = drudeStep * drudeStep;
6254 const double drudeMove2 = drudeMove * drudeMove;
6259 for (
int i = 0; i < numAtoms; ++i ) {
6262 printf(
"f1[%2d]= %f %f %f\n", i, f1[i].x, f1[i].y, f1[i].z);
6263 printf(
"f2[%2d]= %f %f %f\n", i, f2[i].x, f2[i].y, f2[i].z);
6266 f1[i] += f2[i] + f3[i];
6267 if ( drudeHardWallOn && i && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) ) {
6268 if ( ! fixedAtomsOn || ! a[i].atomFixed ) {
6269 if ( drudeStep2 * f1[i].length2() > drudeMove2 ) {
6272 a[i].
position += drudeStep * f1[i];
6274 if ( (a[i].position - a[i-1].position).length2() > drudeBondLen2 ) {
6278 Vector netf = f1[i-1] + f1[i];
6279 if ( fixedAtomsOn && a[i-1].atomFixed ) netf = 0;
6283 if ( fixedAtomsOn && a[i].atomFixed ) f1[i] = 0;
6290 for (
int i = 0; i < numAtoms; ++i ) {
6293 if ( v2 > maxv2 ) maxv2 = v2;
6296 if ( v2 > maxv2 ) maxv2 = v2;
6307 for (
int i = 0; i < numAtoms; ++i ) {
6309 if ( drudeHardWallOn && (a[i].mass > 0.05) && ((a[i].mass < 1.0)) )
continue;
6318 BigReal fmult = 1.01 * sqrt(fmax2/ff);
6319 f *= fmult; ff = f * f;
6328 printf(
"fdotf = %f\n", fdotf);
6329 printf(
"fdotv = %f\n", fdotv);
6330 printf(
"vdotv = %f\n", vdotv);
6343 for (
int i = 0; i < numAtoms; i += hgs ) {
6348 for ( j = i; j < (i+hgs); ++j ) {
6353 for ( j = i; j < (i+hgs); ++j ) {
6373 Vector fixForceNormal = 0;
6374 Vector fixForceNbond = 0;
6377 calcFixVirial(fixVirialNormal, fixVirialNbond, fixVirialSlow, fixForceNormal, fixForceNbond, fixForceSlow);
6408 NamdProfileEvent::SUBMIT_COLLECTIONS);
6410 int dcdSelectionIndex;
6430 #if defined(NAMD_CUDA) || defined(NAMD_HIP) || defined(NAMD_MIC) 6462 int basePriority = ( (seq & 0xffff) << 15 )
6575 #ifdef NAMD_CUDA_XXX 6578 for (
int i=0; i<numAtoms; ++i ) {
6579 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6589 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6593 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6597 CkPrintf(
"%d %g %g %g\n", a[i].
id,
6609 for (
int i=0; i<numAtoms; ++i ) {
6619 float fx = fxNo+fxNb+fxSl;
6620 float fy = fyNo+fyNb+fySl;
6621 float fz = fzNo+fzNb+fzSl;
6623 float f = sqrt(fx*fx+fy*fy+fz*fz);
6626 float x =
patch->
p[i].position.x;
6627 float y =
patch->
p[i].position.y;
6628 float z =
patch->
p[i].position.z;
6630 CkPrintf(
"FORCE(%04i)[%04i] = % .9e % .9e % .9e\n", seq,
id,
6665 #ifdef MEASURE_NAMD_WITH_PAPI 6666 void Sequencer::papiMeasureBarrier(
int step){
6668 broadcast->papiMeasureBarrier.get(step);
Real atomcharge(int anum) const
SubmitReduction * multigratorReduction
Vector gaussian_vector(void)
void rescaleVelocities(int)
void finishReduction(bool doEnergyVirial)
void minimizationQuenchVelocity(void)
int period
period for some step dependent event (e.g. stepsPerCycle)
NAMD_HOST_DEVICE void rescale(Tensor factor)
void max(int i, BigReal v)
int init(int initstep, int initperiod, int delta=0)
DCDParams dcdSelectionParams[16]
void barrier(const SynchronousCollectiveScope scope)
void tcoupleVelocities(BigReal, int)
void addMovDragToPosition(BigReal)
BigReal soluteScalingFactorCharge
void submitForces(int seq, FullAtomList &a, int maxForceUsed, ForceList *f, int prec)
virtual void algorithm(void)
void get_rotdrag_params(BigReal &v, Vector &a, Vector &p, int atomnum) const
void langevinVelocitiesBBK2_SOA(BigReal timestep)
#define NAMD_EVENT_STOP(eon, id)
Bool is_atom_movdragged(int atomnum) const
SubmitReduction * pressureProfileReduction
void minimize_rattle2(const BigReal, Tensor *virial, bool forces=false)
friend class SequencerCUDA
void scaleVelocities(const BigReal velScale)
void positionsReady_SOA(int doMigration=0)
#define GB1_COMPUTE_HOME_PRIORITY
void addVelocityToPosition(BigReal)
SubmitReduction * reduction
NAMD_HOST_DEVICE Vector c() const
SubmitReduction * min_reduction
std::shared_ptr< CudaGlobalMasterServer > getCudaGlobalMaster()
Bool is_atom_exPressure(int atomnum) const
SimpleBroadcastObject< int > traceBarrier
void maximumMove(BigReal)
Bool monteCarloPressureOn
const GlobalMasterIMD * getIMD()
void cycleBarrier(int, int)
void submitCollections_SOA(int step, int zeroVel=0)
Bool globalMasterScaleByFrequency
static void partition(int *order, const FullAtom *atoms, int begin, int end)
SimpleBroadcastObject< Vector > momentumCorrection
void addRotDragToPosition(BigReal)
static PatchMap * Object()
void saveForce(const int ftag=Results::normal)
void registerIDsFullAtom(const FullAtom *begin, const FullAtom *end)
void langevinVelocitiesBBK2(BigReal)
void monteCarloPressureControl(const int step, const int doMigration, const int doEnergy, const int doVirial, const int maxForceNumber, const int doGlobal)
virtual void submit(void)=0
#define ADD_TENSOR_OBJECT(R, RL, D)
SimParameters * simParameters
void addForceToMomentum(FullAtom *__restrict atom_arr, const Force *__restrict force_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
void newMinimizeDirection(BigReal)
void newMinimizePosition(BigReal)
double stochRescaleCoefficient()
Bool CUDASOAintegrateMode
int rattle1(const BigReal, Tensor *virial, SubmitReduction *)
int nextstep
next step value
void gbisComputeAfterP2()
void startWork(const LDObjHandle &handle)
HomePatchList * homePatchList()
void langevinVelocitiesBBK1(BigReal)
std::ostream & endi(std::ostream &s)
char const *const NamdProfileEventStr[]
void updateDevicePatchMap(int startup)
int berendsenPressureFreq
SubmitReduction * willSubmit(int setID, int size=-1)
void rattle1(BigReal, int)
void saveTotalForces(HomePatch *)
SimpleBroadcastObject< BigReal > adaptTemperature
unsigned char get_ss_type(int anum) const
void rebalanceLoad(int timestep)
Bool globalMasterStaleForces
static ReductionMgr * Object(void)
void addForceToMomentum_SOA(const double scaling, double dt_normal, double dt_nbond, double dt_slow, int maxForceNumber)
void minimizeMoveDownhill(BigReal fmax2)
Patch * patch(PatchID pid)
void addForceToMomentum(BigReal, const int ftag=Results::normal, const int useSaved=0)
void submitReductions_SOA()
std::vector< PatchRecord > & getPatches()
static PatchMap * ObjectOnPe(int pe)
float * langScalVelBBK2
derived from langevinParam
void pauseWork(const LDObjHandle &handle)
SimpleBroadcastObject< BigReal > tcoupleCoefficient
int NAMD_gcd(int a, int b)
void exchangeCheckpoint(int scriptTask, int &bpc)
Molecule stores the structural information for the system.
void split(int iStream, int numStreams)
static NAMD_HOST_DEVICE Tensor identity(BigReal v1=1.0)
void addForceToMomentum3(const BigReal timestep1, const int ftag1, const int useSaved1, const BigReal timestep2, const int ftag2, const int useSaved2, const BigReal timestep3, const int ftag3, const int useSaved3)
void positionsReady(int doMigration=0)
void submitHalfstep_SOA()
SimpleBroadcastObject< BigReal > stochRescaleCoefficient
SimpleBroadcastObject< int > monteCarloBarostatAcceptance
void submitCollections(int step, int zeroVel=0)
void stochRescaleVelocities_SOA(int step)
static void print_vel_SOA(const double *vel_x, const double *vel_y, const double *vel_z, int ilo=0, int ihip1=1)
void runComputeObjects_SOA(int migration, int pairlists, int step)
BigReal calcKineticEnergy()
void adaptTempUpdate(int)
void positionsReady_GPU(int doMigration=0, int startup=0)
int32 * hydrogenGroupSize
#define TIMER_START(T, TYPE)
int rattle1_SOA(const BigReal, Tensor *virial, SubmitReduction *)
void calcFixVirial(Tensor &fixVirialNormal, Tensor &fixVirialNbond, Tensor &fixVirialSlow, Vector &fixForceNormal, Vector &fixForceNbond, Vector &fixForceSlow)
#define NAMD_PROFILE_START()
float * gaussrand_x
fill with Gaussian distributed random numbers
static __device__ __host__ __forceinline__ int computeAtomPad(const int numAtoms, const int tilesize=WARPSIZE)
int numPatches(void) const
static std::pair< int, int > coordinateNeeded(int timestep)
Check if the step requires to output the coordinates.
#define NAMD_EVENT_START(eon, id)
void stochRescaleVelocities(int)
void rattle1_SOA(BigReal, int)
#define COMPUTE_HOME_PRIORITY
void constructDevicePatchMap()
static void print_tensor(const Tensor &t)
NAMD_HOST_DEVICE BigReal length(void) const
NAMD_HOST_DEVICE Position apply_transform(Position data, const Transform &t) const
void NAMD_bug(const char *err_msg)
void multigratorPressure(int step, int callNumber)
static ComputeCUDAMgr * getComputeCUDAMgr()
void berendsenPressure(int)
void submitMomentum(int step)
int rescaleVelocities_numTemps
double * vel_x
Jim recommends double precision velocity.
void submitLoadStats(int timestep)
void mollyMollify(Tensor *virial)
void runComputeObjects(int migration=1, int pairlists=0, int pressureStep=0)
void rebalance(Sequencer *seq, PatchID id)
void rescaleaccelMD(int, int, int)
SimpleBroadcastObject< Tensor > velocityRescaleTensor2
SimpleBroadcastObject< int > IMDTimeEnergyBarrier
void clearDevicePatchMap()
NAMD_HOST_DEVICE BigReal length2(void) const
#define NAMD_EVENT_RANGE_2(eon, id)
SimpleBroadcastObject< int > scriptBarrier
bool getIsGlobalDevice() const
const_iterator const_begin(void) const
PatchID getPatchID() const
void scalePositionsVelocities(const Tensor &posScale, const Tensor &velScale)
int monteCarloPressureFreq
int getPesSharingDevice(const int i)
SimpleBroadcastObject< BigReal > velocityRescaleFactor2
Bool is_atom_rotdragged(int atomnum) const
void doMigrationGPU(const int startup, const int doGlobal, const int updatePatchMap)
void langevinPiston_SOA(int step)
void gbisComputeAfterP1()
void NAMD_die(const char *err_msg)
static LdbCoordinator * Object()
void gaussian_array_f(float *a, int n)
#define TIMER_INIT_WIDTH(T, TYPE, WIDTH)
int getForceSendActive() const
static int forceNeeded(int timestep)
Check if the step requires to output the forces.
int berendsenPressure_count
SimpleBroadcastObject< BigReal > velocityRescaleFactor
void publish(int tag, const T &t)
SimpleBroadcastObject< BigReal > minimizeCoefficient
void reassignVelocities(BigReal, int)
Bool LJPMESerialRealSpaceOn
void langevinVelocitiesBBK1_SOA(BigReal timestep)
SimpleBroadcastObject< Vector > accelMDRescaleFactor
int hardWallDrude(const BigReal, Tensor *virial, SubmitReduction *)
ComputeGlobal * computeGlobalObject
void saveForce(const int ftag=Results::normal)
void runComputeObjectsCUDA(int doMigration, int doGlobal, int pairlists, int nstep, int startup)
SimpleBroadcastObject< Tensor > positionRescaleFactor
void buildRattleList_SOA()
void langevinVelocities(BigReal)
IMDSessionInfo IMDsendsettings
static CollectionMaster * Object()
void hardWallDrude(BigReal, int)
NodeBroadcast * nodeBroadcast
#define TIMER_STOP(T, TYPE)
void multigratorTemperature(int step, int callNumber)
static constexpr int num_inline_peer
double * recipMass
derived from mass
void reinitVelocities(void)
int pressureProfileAtomTypes
int checkpoint_berendsenPressure_count
ControllerBroadcasts * broadcast
#define NAMD_EVENT_START_EX(eon, id, str)
void maximumMove_SOA(const double dt, const double maxvel2)
CollectionMgr *const collection
void updateDeviceData(const int startup, const int maxForceUsed, const int doGlobal)
#define GB2_COMPUTE_HOME_PRIORITY
#define NAMD_PROFILE_STOP()
Bool langevinGammasDiffer
int getNumStepsToRun(void)
void resetMovingAverage()
void newtonianVelocities(BigReal, const BigReal, const BigReal, const BigReal, const int, const int, const int)
static void print_vel_AOS(const FullAtom *a, int ilo=0, int ihip1=1)
void rescaleSoluteCharges(BigReal)
void addVelocityToPosition_SOA(const double dt)
void setVal(const NodeReduction *other)
#define SOA_SIMPLIFY_PARAMS
void submitVelocities(int seq, int zero, FullAtomList &a, int prec)
void submitMinimizeReductions(int, BigReal fmax2)
#define ADD_VECTOR_OBJECT(R, RL, D)
CudaComputeNonbonded * getCudaComputeNonbonded()
int multigratorPressureFreq
static int velocityNeeded(int timestep)
Check if the step requires to output the velocities.
int numPatchesOnNode(int node)
void submitPositions(int seq, FullAtomList &a, Lattice l, int prec, int dcdSelectionIndex)
void correctMomentum(int step, BigReal drifttime)
NAMD_HOST_DEVICE void outerAdd(BigReal scale, const Vector &v1, const Vector &v2)
std::ostream & iERROR(std::ostream &s)
void addForceToMomentum3(FullAtom *__restrict atom_arr, const Force *__restrict force_arr1, const Force *__restrict force_arr2, const Force *__restrict force_arr3, const BigReal dt1, const BigReal dt2, const BigReal dt3, int num_atoms) __attribute__((__noinline__))
void addVelocityToPosition(FullAtom *__restrict atom_arr, const BigReal dt, int num_atoms) __attribute__((__noinline__))
ForceList f[Results::maxNumForces]
float * langScalRandBBK2
from langevinParam and recipMass
void get_movdrag_params(Vector &v, int atomnum) const
static GlobalGPUMgr * Object()
void enableEarlyExit(void)
void submitReductions(int)
#define namd_reciprocal(x)
SimpleBroadcastObject< Tensor > positionRescaleFactor2
void integrate_CUDA_SOA(int scriptTask)
void loweAndersenFinish()
int getNumPesSharingDevice()
SimParameters *const simParams
SimpleBroadcastObject< Tensor > velocityRescaleTensor
NAMD_HOST_DEVICE Vector unit(void) const
__thread DeviceCUDA * deviceCUDA
NAMD_HOST_DEVICE Vector origin() const
virtual void atomUpdate()
void rescaleVelocitiesByFactor(BigReal)
int multigratorTemperatureFreq
int globalMasterFrequency
static SynchronousCollectives * Object()
CudaPmeOneDevice * createCudaPmeOneDevice()
#define PATCH_PRIORITY(PID)
CudaPmeOneDevice * getCudaPmeOneDevice()
void updatePatchOrder(const std::vector< CudaLocalRecord > &data)
void berendsenPressure_SOA(int step)
int32 numAtoms
number of atoms
void printDevicePatchMap()
void compute(const Lattice &lattice, int doEnergyVirial, int step)
void exchangeAtoms(int scriptTask)
T get(int tag, const int expected=-1)
Attempts to retrieve a previously published value for a given tag and id.