From: Ignacio Fernández Galván (jellby_at_yahoo.com)
Date: Wed Sep 15 2010 - 07:24:14 CDT

Nicolas SAPAY <nicolas.sapay_at_cermav.cnrs.fr> wrote:

> I need to calculate the average life time of H-bonds in a trajectory (or
> even better, the life-time probability distribution). Is there a script /
> function / plugin that could do that? I didn't find anything on the
> mailing list or the web site.

Just a couple of weeks ago I had to do that, this is what I used:

residence.tcl
--------------------------------------------------------
proc residence { mol sel_text filename } {
  set sel [atomselect $mol $sel_text]
  set n [molinfo $mol get numframes]
  set file [open $filename w]
  for { set i 0 } { $i < $n } { incr i } {
      $sel frame $i
      $sel update
      puts $file "[$sel list]"
      if { $i % 100 == 0 } { puts "$i / $n" }
  }
  close $file
}
-------------------------------------------------------------

If you include that in your VMD startup scripts, then you can use it as:

residence <mol> <sel_text> <filename>

where <mol> is the molecule you want to analyze (often 0), <sel_text> is the
text, between quotes, you want to use to select atoms (like "oxygen within 3.5
of index 23") and <filename> is an output filename. The output filename then
contains, for each frame, the indices of the selected atoms.

Then, I used this Fortran90 program to analyze the results (sorry, comments in
Spanish, beware of possible line-wraps):

residencia.f90
---------------------------------------------------------
! Calcula la probabilidad de supervivencia en función del tiempo, de donde
! por ajuste exponencial puede extraerse el tiempo medio de residencia.
! Según: J. Comput. Chem. 14 (1993) 1396
! y J. Phys. Chem. 87 (1983) 5071

PROGRAM Residencia
IMPLICIT NONE
CHARACTER(LEN=1024) :: Linea
LOGICAL, DIMENSION(:,:), ALLOCATABLE :: Mols
INTEGER, DIMENSION(:,:), ALLOCATABLE :: Seqs,Matriz
INTEGER, DIMENSION(:), ALLOCATABLE :: Indices
DOUBLE PRECISION, DIMENSION(:), ALLOCATABLE :: Supervivencia
INTEGER :: i,j,k,Error,NConf,NMol,NMax,MaxSeq,Short

! Exploración inicial para ver cuántas configuraciones y cuántas moléculas hay
OPEN(12,ACTION='READWRITE',STATUS='SCRATCH')
NConf=0
NMol=0
DO
  READ(5,'(A)',IOSTAT=Error) Linea
  IF (Error /= 0 ) EXIT
  NConf=NConf+1
  j=0
  DO
      Linea=ADJUSTL(Linea)
      READ(Linea,*,IOSTAT=Error) i
      IF (Error /= 0) EXIT
      j=j+1
      NMol=MAX(NMol,i)
      NMax=MAX(NMax,j)
      Linea=Linea(INDEX(Linea,' '):)
      WRITE(12,*) i
  END DO
END DO
REWIND(12)
! Se recalculan los indices de las moléculas
ALLOCATE(Indices(0:NMol))
j=0
Indices(:)=0
DO
  READ(12,*,IOSTAT=Error) i
  IF (Error /= 0) EXIT
  IF (Indices(i) /= 0) CYCLE
  j=j+1
  Indices(i)=j
END DO
NMol=j
CLOSE(12)

REWIND(5)

ALLOCATE(Mols(NConf,NMol),Seqs(NMol,NConf),Supervivencia(NConf))
Mols(:,:)=.FALSE.

! Se lee, para cada configuracion, qué moléculas están seleccionadas
DO i=1,NConf
  READ(5,'(A)') Linea
  DO
      Linea=ADJUSTL(Linea)
      READ(Linea,*,IOSTAT=Error) j
      IF (Error /= 0) EXIT
      Linea=Linea(INDEX(Linea,' '):)
      Mols(i,Indices(j))=.TRUE.
  END DO
END DO

! Se eliminan ausencias breves
Short=0 !numero de pasos consecutivos en que se permite la ausencia
DO i=1,NMol
  j=1
  DO WHILE (.NOT. Mols(j,i))
      j=j+1
  END DO
  k=0
  DO WHILE (j <= NConf)
      IF (Mols(j,i)) THEN
        IF ((k /= 0) .AND. (k <= Short)) Mols(j-k:j-1,i)=.TRUE.
        k=0
      ELSE
        k=k+1
    END IF
    j=j+1
  END DO
END DO

! Para cada molécula se calcula en cuántas secuencias completas de k
! configuraciones está, E(i)
Seqs(:,:)=0
MaxSeq=0
DO i=1,NMol
  k=0
  DO j=1,NConf
      IF (Mols(j,i)) THEN
          k=k+1
        ELSE
          IF (k /= 0) THEN
              Seqs(i,k)=Seqs(i,k)+1
              MaxSeq=MAX(MaxSeq,k)
          END IF
          k=0
      END IF
  END DO
  IF (k /= 0) Seqs(i,k)=Seqs(i,k)+1
END DO

! Y ahora se calculan las secuencias incompletas, B(i)
ALLOCATE(Matriz(MaxSeq,MaxSeq))
Matriz(:,:)=0
DO i=1,MaxSeq
  DO j=i,MaxSeq
      Matriz(j,i)=j-i+1
  END DO
END DO
Seqs(:,1:MaxSeq)=MATMUL(Seqs(:,1:MaxSeq),Matriz(:,:))

! Finalmente se imprimen los datos
DO i=1,MaxSeq
  Supervivencia(i)=SUM(Seqs(:,i))/DBLE(NConf-i+1)
  WRITE(6,*) i,Supervivencia(i),LOG(Supervivencia(i))
END DO

DEALLOCATE(Indices,Mols,Seqs,Supervivencia)

! Se pueden analizar con gnuplot:
! plot "fichero" u 1:2 w l, "fichero" u 1:3 w l
! f(x)=a-x/b; a=1; b=1
! fit [mix:max] f(x) "fichero" using 1:3 via a,b
! plot "fichero" u 1:3 w l, f(x)

END PROGRAM Residencia

---------------------------------------------------------------

Once compiled, use it as:

./residencia < input > output

where "input" is the filename resulting from the VMD script, and "output" is
your desired output file. The output contains, for each line: the timestep, the
number of atoms that remain selected for that number of timesteps (change
"Short" if you want to allow short excursions), and the logarithm of that.

I hope that helps.

Ignacio