/***************************************************************************
 *cr                                                                       
 *cr            (C) Copyright 1995 The Board of Trustees of the           
 *cr                        University of Illinois                       
 *cr                         All Rights Reserved                        
 *cr                                                                   
 ***************************************************************************/

/***************************************************************************
 * RCS INFORMATION:
 *
 *	$RCSfile: BaseMolecule.C,v $
 *	$Author: billh $	$Locker:  $		$State: Exp $
 *	$Revision: 1.22 $	$Date: 1995/07/01 22:45:54 $
 *
 ***************************************************************************
 * DESCRIPTION:
 *
 * Base class for all molecules, without display-specific information.  This
 * portion of a molecule contains the structural data, and all routines to
 * find the structure (backbone, residues, etc).  It does NOT contain the
 * animation list; that is maintained by Molecule (which is derived from
 * this class).
 *
 ***************************************************************************/

#include <ctype.h>
#include <stdlib.h>
#include "BaseMolecule.h"
#include "Atom.h"
#include "NameList.h"
#include "Inform.h"
#include "utilities.h"

// static variables for this class
int BaseMolecule::nextIDNumber = 0;

////////////////////////////  constructor

BaseMolecule::BaseMolecule(void) : residueList(10) , fragList(1),
     pfragList(1), nfragList(1) {

  // assign id number
  ID = nextIDNumber++;
  
  MSGDEBUG(1,"Creating new BaseMolecule " << ID << " ..." << sendmsg);

  // initialize all variables
  maxAtoms = 0;
  nAtoms = nBonds = nBackProtein = nBackDNA = 0;
  nResidues = nSegments = 0;
  atomList = NULL;
}


////////////////////////////   destructor

BaseMolecule::~BaseMolecule(void) {
  int i;

  MSGDEBUG(1,"Deleting BaseMolecule " << ID << " ..." << sendmsg);

  // delete structural data
  if(nAtoms > 0)
    for(i=0; i < nAtoms; delete atomList[i++]);
  if(maxAtoms > 0)
    delete [] atomList;
  for (i=0; i<residueList.num(); i++) {
     delete residueList[i];
  }
  for (i=0; i<nfragList.num(); i++) {
    delete nfragList[i];
  }
  for (i=0; i<pfragList.num(); i++) {
    delete pfragList[i];
  }
  for (i=0; i<fragList.num(); i++) {
    delete fragList[i];
  }
};


///////////////////////  protected routines

// initialize the atom list ... should be called before adding any atoms
void BaseMolecule::init_atoms(int n) {

  MSGDEBUG(2,"BaseMolecule:Initializing storage for " << n << " atoms ...");
  MSGDEBUG(2,sendmsg);

  if(n <= 0)
    return;

  if(atomList) {
    msgErr << "BaseMolecule: Attempt to init atom storage twice." << sendmsg;
    return;
  }
  
  maxAtoms = n;
  nAtoms = 0;
  atomList = new Atom *[maxAtoms];
}


// add a new atom; return it's index, or (-1) if error.
int BaseMolecule::add_atom(char *name, char *type, char *resname, char *resid,
  	char *chain, char *segname, float *pos, float *extra) {
  if(!atomList || nAtoms >= maxAtoms) {
    msgErr << "BaseMolecule: Cannot add new atom; currently " << nAtoms;
    msgErr << " atoms in structure." << sendmsg;
    return (-1);
  }

  // create atom  
  Atom *newatom = new Atom(nAtoms, pos, extra, name, type, resname, resid,
  				chain, segname);

  // add names to namelist, and put indices in Atom object
  newatom->nameindex = atomNames.add_name(name,atomNames.num());
  newatom->typeindex = atomTypes.add_name(type, atomTypes.num());
  newatom->resnameindex = resNames.add_name(resname, resNames.num());
  newatom->residindex = resIds.add_name(resid, resIds.num());
  newatom->segnameindex = segNames.add_name(segname, segNames.num());

  // use default of 'X' for chain if not given
  if(!chain || ! (*chain) || *chain == ' ')
    newatom->chainindex = chainNames.add_name("X", chainNames.num());
  else
    newatom->chainindex = chainNames.add_name(chain, chainNames.num());

  // put the atom in the list, and return index
  atomList[nAtoms] = newatom;

  MSGDEBUG(3,"BaseMolecule: Added new atom " << nAtoms << ", name=");
  MSGDEBUG(3,atomNames.name(newatom->nameindex) << sendmsg);

  return nAtoms++;
}


// add a new bond; return it's index, or (-1) if error.
int BaseMolecule::add_bond(int a, int b, Atom::BackboneType type) {

  if(!nAtoms || a >= nAtoms || b >= nAtoms) {
    msgErr << "BaseMolecule: Atoms must be added before bonds." << sendmsg;
    return (-1);
  } else if(a == b) {
    msgErr << "BaseMolecule: Cannot bond atom " <<a<< " to itself." << sendmsg;
  }

  // put the bond in the atom list, and return total number of bonds so far
  atom(a)->add_bond(b,type);
  atom(b)->add_bond(a,type);
  
  MSGDEBUG(3,"BaseMolecule: Added new bond " << nBonds << " from ");
  MSGDEBUG(3,a << " to " << b << sendmsg);

  return ++nBonds;
}


///////////////////////////  public routines

// scaling factor required to make the molecule fit within (-1 ... 1)
float BaseMolecule::scale_factor(void) { 
  return (num() > 0 ? item(0)->scale_factor : 1.0);
}
  

// center of volume of this molecule
void BaseMolecule::cov(float& x, float& y, float& z) {
  if(num() > 0) {
    float *tspos = (item(0))->COV;
    x = *(tspos++);
    y = *(tspos++);
    z = *(tspos);
  } else {
    x = y = z = 0.0;
  }
}


// return a 'default' value for a given atom name
float BaseMolecule::default_radius(char *nm) {
  float val = 1.5;

  if(nm) {
    switch(toupper(nm[0])) {
      case 'H' : val = 1.00; break;
      case 'C' : val = 1.50; break;
      case 'N' : val = 1.40; break;
      case 'O' : val = 1.30; break;
      case 'F' : val = 1.20; break;
      case 'S' : val = 1.90; break;
    }
  }

  return val;
}


// return a 'default' value for a given atom name
float BaseMolecule::default_mass(char *nm) {
  float val = 12.0;

  if(nm) {
    switch(toupper(nm[0])) {
      case 'H' : val = 1.00800; break;
      case 'C' : val = 12.01100; break;
      case 'N' : val = 14.00700; break;
      case 'O' : val = 15.99900; break;
      case 'F' : val = 55.84700; break;
      case 'S' : val = 32.06000; break;
    }
  }

  return val;
}


// return a 'default' value for a given atom name
float BaseMolecule::default_charge(char *nm) {
  float val = 2.0;

  if(nm) {
    switch(toupper(nm[0])) {
      case 'H' : val = 0.0; break;
      case 'C' : val = -1.0; break;
      case 'N' : val = -0.75; break;
      case 'O' : val = -0.5; break;
      case 'F' : val = -0.25; break;
      case 'S' : val = 1.0; break;
    }
  }

  return val;
}


// create the molecule, once all the atoms and bonds have been generated
int BaseMolecule::create(void) {

  // make sure things are OK so far
  if(nAtoms < 0) {
    msgErr << "BaseMolecule: Cannot create molecule with < 0 atoms."<<sendmsg;
    return FALSE;
  }

  // I have to let 0 atoms in because I want to be able to read things
  // like electron density maps, which have no atoms.
  // It is kinda wierd, then to make BaseMolecule be at the top of the
  // heirarchy.  Oh well.
  if(nAtoms < 1)
    return TRUE;

  // call routines to find different characteristics of the molecule
  msgInfo << "Analyzing structure ..." << sendmsg;
  msgInfo << "   Atoms: " << nAtoms << "   Bonds: " << nBonds << sendmsg;

  // find backbone bonds
  find_backbone();

  msgInfo << "   Backbone bonds: Protein: " << nBackProtein;
  msgInfo << "  DNA: " << nBackDNA << sendmsg;

  // find all the atoms in a resid connected to DNA/RNA/PROTEIN/WATER
  // also, assign a unique resid (uniq_resid) to each atom
  int num_residues = find_residues();
  msgInfo << "   Residues: " << num_residues << sendmsg;
  msgInfo << "   Waters: " << find_waters() << sendmsg;
  
  find_connected_residues(num_residues); // figure out which residues
                                         // are connected to which
  
  msgInfo << "   Segments: " << find_segments() << sendmsg;
  
  
  msgInfo << "   Fragments: " << find_fragments();
  msgInfo << "   Protein: " << pfragList.num();
  msgInfo << "   Nucleic: " << nfragList.num() << sendmsg;
  return TRUE;
}


///// ********* functions to find the backbone, etc

// find the backbone.  First, find the CA, then see if it
// is connected to C and N and if the C is connected to
// an O.  If so, mark the atoms as backbone atoms.
// Second, do the same with the DNA/RNA backbone atoms
//  Then go through and find and mark all bonds between
// backbone atoms.
//  The nBackProtein and nBackDNA are changed by this routine
int BaseMolecule::find_backbone(void)
{
  int i, j;
  
  nBackProtein = nBackDNA = 0;

  // Search for the protein backbone
  int protypes[4];
  protypes[0] = atomNames.typecode("CA");
  protypes[1] = atomNames.typecode("C");
  protypes[2] = atomNames.typecode("O");
  protypes[3] = atomNames.typecode("N");

  for (i=0; i<nAtoms; i++) {
    Atom *a = atomList[i];
    a -> atomType = Atom::NORMAL;	// initialize all atoms
    for (j=0; j < a -> bonds; j++)	// initialize all bonds
      a -> bondType[j] = Atom::NORMAL;

    for (j=0; j < 4; j++) {		// check if in protein backbone
      if (a -> nameindex == protypes[j]) {
        a -> atomType = Atom::PROTEINBACK;
        break;
      }
    }
  }
  // special case for terminal oxygens that miss the search for O
  // by looking for ones connected to a C
  protypes[0] = atomNames.typecode("OT1");
  protypes[1] = atomNames.typecode("OT2");
  for (i=nAtoms-1; i>=0; i--) {
    Atom *a = atomList[i];
    if (a -> nameindex == protypes[0]) { // found OT1
      for (j=0; j < a->bonds; j++) {
        if (atomList[a->bondTo[j]] -> atomType == Atom::PROTEINBACK)
           a -> atomType = Atom::PROTEINBACK;
      }
    } else if (a -> nameindex == protypes[1]) { // found OT2
      for (j=0; j< a->bonds; j++) {
        if (atomList[a->bondTo[j]] -> atomType == Atom::PROTEINBACK)
           a -> atomType = Atom::PROTEINBACK;
      }
    }
  }
  
  // now find the bonds between protein backbone atoms
  for (i=0; i<nAtoms; i++) {
    Atom *a = atomList[i];
    if (a -> atomType) {  // found a backbone atom
      for (j=0; j < a->bonds; j++) {
        int nb = a->bondTo[j];
        Atom *b = atomList[ nb ];
        if (b -> atomType) {
          a -> bondType[j] = Atom::PROTEINBACK;
          if (nb > i)  // only count bonds once
            nBackProtein ++;
        }
      }
    }
  }

  // search for the DNA/RNA backbone;  the atom names are:
  // for the phosphate:  P, O1P, O2P
  // for the rest: O3', C3', C4', C5', O5'
  // (or O3*, C3*, C4*, C5*, O5*)
  
  int nuctypes[14];
  nuctypes[0] = atomNames.typecode("P");
  nuctypes[1] = atomNames.typecode("O1P");
  nuctypes[2] = atomNames.typecode("O2P");
  nuctypes[3] = atomNames.typecode("O3'");
  nuctypes[4] = atomNames.typecode("C3'");
  nuctypes[5] = atomNames.typecode("C4'");
  nuctypes[6] = atomNames.typecode("C5'");
  nuctypes[7] = atomNames.typecode("O5'");
  nuctypes[8] = atomNames.typecode("O3*");
  nuctypes[9] = atomNames.typecode("C3*");
  nuctypes[11] = atomNames.typecode("C4*");
  nuctypes[12] = atomNames.typecode("C5*");
  nuctypes[13] = atomNames.typecode("O5*");
  
  for (i=0; i<nAtoms; i++) {
    if(! (atomList[i] -> atomType) ) {
      for (j=0; j < 14; j++) {		// check if in dna backbone
        if (atomList[i] -> nameindex == nuctypes[j]) {
          atomList[i] -> atomType = Atom::NUCLEICBACK;
          break;
        }
      }
    }
  }

  // now find the bonds between protein backbone atoms
  for (i=0; i<nAtoms; i++) {
    Atom *a = atomList[i];
    if (a -> atomType) {  // found a backbone atom
      for (j=0; j < a->bonds; j++) {
        int nb = a->bondTo[j];
        Atom *b = atomList[ nb ];
        if (b -> atomType && a -> bondType[j] != Atom::PROTEINBACK) {
          a -> bondType[j] = Atom::NUCLEICBACK;
          if (nb > i)  // only count bonds once
            nBackDNA ++;
        }
      }
    }
  }

  return nBackProtein + nBackDNA; 
}

// find water molecules based on the residue name
// from the documentation for molscript, these are possible
// waters:
// type H2O HH0 OHH HOH OH2 SOL WAT
// as well, I add TIP, TIP2, TIP3, and TIP4
// The count is the number of sets of connected Atom::WATERS
int BaseMolecule::find_waters(void)
{
  int watertypes[11];
  int i,j;
  Atom *a;
  watertypes[0] = resNames.typecode("H2O");
  watertypes[1] = resNames.typecode("HH0");
  watertypes[2] = resNames.typecode("OHH");
  watertypes[3] = resNames.typecode("HOH");
  watertypes[4] = resNames.typecode("OH2");
  watertypes[5] = resNames.typecode("SOL");
  watertypes[6] = resNames.typecode("WAT");
  watertypes[7] = resNames.typecode("TIP");
  watertypes[8] = resNames.typecode("TIP2");
  watertypes[9] = resNames.typecode("TIP3");
  watertypes[10] = resNames.typecode("TIP4");

  for (i=0; i<nAtoms; i++) {
    a = atomList[i];
    if (a->residueType == Atom::NOTHING) {  // make sure it isn't named yet
      for (j=0; j<11; j++) {
        if (watertypes[j] == a -> resnameindex) {
          a->residueType = Atom::WATERS;
          break;
        } //if 
      } // for
    } // if
  } // for
  
  // now do the count
  char *tmp = new char[nAtoms];
  for (i=nAtoms-1; i>=0; i--)
    tmp[i] = FALSE;
  int count = 0;
  for (i=0; i<nAtoms; i++) {
    a = atomList[i];
    if (a->residueType == Atom::WATERS && !tmp[i]) {
      count ++;
      find_connected_waters(i, a->residindex, tmp);
    }
  }
  delete [] tmp;
  return count;   
}

// if this is a Atom::WATERS with index idx, mark it and find if
// any of its neighbors are Atom::WATERS
void BaseMolecule::find_connected_waters(int i, int idx, char *tmp)
{
  Atom *a = atomList[i];
  int j;
  if (a->residueType == Atom::WATERS && !tmp[i]) {
    tmp[i] = TRUE;
    for (j=0; j<a->bonds; j++) {
      find_connected_waters(a -> bondTo[j], idx, tmp);
    }
  }
}

// find n backbone atoms connected together with the given residueid
// return TRUE if there are enough
// this assumes that the given atom (atomidx) is correct
// this is a RECURSIVE algor.
int BaseMolecule::find_connected_backbone(Atom::BackboneType backbone,
   int atomidx, int residueid, int tmpid, int *flgs)
{
  if (flgs[atomidx] != 0) {  // oops, been here before, or someone else
    return 0;
  }
  Atom *a = atomList[atomidx];
  if (a -> atomType != backbone) {  // not a backbone atom
    return 0;
  }
  if (atoi(a -> residstr) != residueid) { // number doesn't match
    return 0;
  }
  // then I'm valid;
  int count = 1;  // the 1 is for myself
  flgs[atomidx] = tmpid;  // temp flag to prevent recurive loops
  for (int i=0; i<a->bonds; i++) {
    count += find_connected_backbone( backbone, a -> bondTo[i],
                 residueid, tmpid, flgs);
  }
  return count;
}

// the find_connected_backbone left terms of flgs which need to be
// cleaned up
void BaseMolecule::clean_up_connection(int i, int tmpid, int *flgs)
{
  if (flgs[i] != tmpid) {
    return;
  }
  int j;
  Atom *a = atomList[i];
  flgs[i] = 0;
  for (j=a->bonds - 1; j>=0; j--) {
    clean_up_connection(a->bondTo[j], tmpid, flgs);
  }
}

// now that I know this resid is okay, mark it so
void BaseMolecule::find_connected_atoms_in_resid(
    Atom::ResidueType restype, int i, int residueid, int tmpid, int *flgs)
{
  if (flgs[i] != 0) {  // oops, been here before, or someone else
    return;
  }
  Atom *a = atomList[i];
  if (atoi(a -> residstr) != residueid) { // number doesn't match
    return;
  }
  flgs[i] = tmpid;
  a -> residueType = restype;
  for (int j=a->bonds - 1; j>=0; j--) {
    find_connected_atoms_in_resid( restype, a->bondTo[j],
        residueid, tmpid, flgs);
  }
}

// Find connected backbone atoms with the same resid
// if found, find all the atoms with the same resid
// which are connected to those backbone atoms only through
// atoms of the same resid
void BaseMolecule::find_and_mark(int n, Atom::BackboneType backbone,
  Atom::ResidueType restype, int *tmpid, int *flgs)
{
  int i;
  Atom *a;
  int residueid; // the real resid

  for (i=0; i<nAtoms; i++) {
    a = atomList[i];   // look for a new backbone atom
    if ( a -> atomType == backbone && flgs[i] == 0) {
      residueid = atoi( a-> residstr);
      if (find_connected_backbone(backbone, i, residueid, *tmpid, 
           flgs) >= n) {
        // if find was successful, start all over again
        clean_up_connection(i, *tmpid, flgs);
        // but mark all the Atoms connected to here
        find_connected_atoms_in_resid(restype, i, residueid, *tmpid, flgs);
        // and one more was made
        (*tmpid) ++;
      } else {
        // clean things up so I won't have problems later
        clean_up_connection(i, *tmpid, flgs);
      }
    }
  }
}

// find connected atoms to i with the same residindex and label
// it with the uniq_resid -- recursive algor.
void BaseMolecule::make_uniq_resid(int i, int residindex,
     int newresid, int *flgs)
{
  if (flgs[i])
    return;
  Atom *a = atomList[i];
  if (a -> residindex != residindex)
    return;
    
  a -> uniq_resid = newresid;  // give it a number
  flgs[i] = TRUE;              // and make me has having been done
  for (int j= a -> bonds - 1; j>=0; j--) {
    make_uniq_resid(a->bondTo[j], residindex, newresid, flgs);
  }
}

int BaseMolecule::find_residues(void)
{
  int *flgs;       // so I'll label each one myself; this is useful tmp space
  flgs = new int[nAtoms]; // for when I do searches for connected atoms
  int i;
  for (i=0; i<nAtoms; i++) {
    flgs[i] = 0;
  }
  
  // assign a uniq resid (uniq_resid) to each set of connected atoms
  // with the same residue id.  There could be many residues with the
  // same id, but not connected (the SSN problem - SSNs are not unique
  // so don't use them as the primary key)
  int num_residues = 0;
  for (i=0; i<nAtoms; i++) {
    if (!flgs[i]) {  // not been numbered
      make_uniq_resid(i, atomList[i] -> residindex, num_residues, flgs);
      num_residues++;
    }
  }
   
  int back_res_count;  // used as a temp. count of the num. of residues
                       // on some sort of backbone
  back_res_count = 1; 
  for (i=nAtoms-1; i>=0; i--) {
    flgs[i] = 0;
  }
  
  //  hunt for the proteins
  // there must be 4 PROTEINBACK atoms connected and with the same resid
  // then all connected atoms will be marked as PROTEIN atoms
  // this gets everything except the terminals
  find_and_mark(4, Atom::PROTEINBACK, Atom::PROTEIN, &back_res_count, flgs);
  
  // do the same for nucleic acids
  find_and_mark(8, Atom::NUCLEICBACK, Atom::NUCLEIC, &back_res_count, flgs);
  // I might not want to check for the phosphate (P and 2 O's).  Here's
  // the quick way I can almost do that.  Unfortionately, that
  // messes up nfragList, since it needs a P to detect an end
  //find_and_mark(5, Atom::NUCLEICBACK, Atom::NUCLEIC, &back_res_count, flgs);
  
  delete [] flgs;
  return num_residues;
}

int BaseMolecule::find_atom_in_residue(int nametype, int residue)
{
 int size = residueList[residue] -> atoms.num();
 for (int i = 0; i<size; i++) {
   if (atomList[residueList[residue] -> atoms[i]] -> nameindex == nametype) {
     return residueList[residue]->atoms[i];
   }
 }
 return -3;
}
int BaseMolecule::find_atom_in_residue(char *name, int residue)
{
  int nametype = atomNames.typecode(name);
  if (nametype < 0)
    return -2;
  return find_atom_in_residue(nametype, residue);
}

// find which residues are connected to which
// remember, I already have the uniq_id for each atom
void BaseMolecule::find_connected_residues(int num_residues)
{
  int i, j;
  for (i=0; i<num_residues; i++) { // init the list to NULLs
    residueList[i] = NULL;
  }
  
  for (i=nAtoms-1; i>=0; i--) { // go through all the atoms
    j = atomList[i] -> uniq_resid;
    if (residueList[j] == NULL) { // then init the residue
      residueList[j] = new Residue( j, atoi(atomList[i] -> residstr), 
                                     atomList[i] -> residueType);
    }
    // Tell the residue that this atom is in it
    residueList[j] -> add_atom(i);
  }

  // double check that everything was created
  for (i=0; i<num_residues; i++) {
    if (residueList[i] == NULL) { // no atom was found for this residue
      msgErr << "Mysterious residue creation in ";
      msgErr << "BaseMolecule::find_connected_residues." << sendmsg;
      residueList[i] = new Residue(i, -1, Atom::NOTHING);
    }
  }
  
  // now go through the atoms and find out which ones are connected
  // to which; with that info, tell the corresponding residues which
  // are connected to which
  Atom *a;
  for (i=0; i<nAtoms; i++) {
    a = atomList[i];
    for (j=0; j<a -> bonds; j++) {
      if (a -> bondTo[j] > i){ // faster by only consid. larger atomnums
        residueList[ a -> uniq_resid ] -> add_bond(
                    atomList[ a -> bondTo[j]] -> uniq_resid);
        residueList[atomList[ a -> bondTo[j]] -> uniq_resid] 
         -> add_bond(a -> uniq_resid);
      }
    }
  }
  
  // finally, tell the residue the connection type
  Atom::ResidueType bondfrom, bondto;
  for (i=0; i<num_residues; i++) {
    bondfrom = residueList[i] -> residueType;
    for (j=0; j<residueList[i]->bonds; j++) {
      bondto = residueList[residueList[i] -> bondTo[j]] -> residueType;
      if (bondfrom == bondto) {
        if (bondfrom == Atom::PROTEIN) {
          residueList[i] -> bondType[j] = Residue::PRO_PRO;
        } else if (bondfrom == Atom::NUCLEIC) {
          residueList[i] -> bondType[j] = Residue::NUC_NUC;
        } else {
          residueList[i] -> bondType[j] = Residue::OTHER;
        }
      } else {
        if (i < residueList[i] -> bondTo[j] ) {
          msgWarn << "Unusual bond between residues ";
          msgWarn << residueList[i] -> resid << " and ";
          msgWarn << residueList[residueList[i] -> bondTo[j]] -> resid;
          msgWarn << sendmsg;
          residueList[i] -> bondType[j] = Residue::OTHER;
        }
      }
    }
  }
}

// find all the residues connected to a specific residue
void BaseMolecule::find_connected_fragment(int resnum, int fragnum, int *flgs)
{
  if (flgs[resnum])  // been here before
    return;
  flgs[resnum] = TRUE;
  fragList[fragnum]-> append(resnum);
  residueList[resnum] -> fragment = fragnum;  // make residue know its fragment
  for (int j=residueList[resnum] -> bonds - 1; j>=0; j--) {
    find_connected_fragment(residueList[resnum] -> bondTo[j], fragnum, flgs);
  }
}

// find each collection of connected fragments
int BaseMolecule::find_fragments(void) {
  int count = 0;
  int *flgs = new int[residueList.num()]; // set up temp space
  for (int i=residueList.num()-1; i>=0; i--)
    flgs[i] = 0;

  for (i=0; i<residueList.num(); i++) { // find unmarked fragment
    if (!flgs[i]) {
      fragList.append(new Fragment);
      find_connected_fragment(i, count, flgs);  // find and mark its neighbors
      count ++;
    }
  }
  // and tell the atoms which fragment they are in
  int j, k;
  for (i=residueList.num()-1; i>=0; i--) {   // get the residues
    k = residueList[i] -> fragment;
    for (j=residueList[i]->atoms.num()-1; j>=0; j--) {  // and their atoms
      atomList[residueList[i] -> atoms[j]] -> fragment = k;
    }
  }
  
  delete [] flgs;
  
  // find the protein and nucleic acid subfragments
  find_subfragments( atomNames.typecode("N"), atomNames.typecode("C"), -1,
     Atom::PROTEIN, &pfragList);
  find_subfragments( atomNames.typecode("P"), atomNames.typecode("O3'"),
     atomNames.typecode("O3*"), Atom::NUCLEIC, &nfragList);

  return count;
}
      

// this adds the current Atom::ResidueType to the *subfragList,
// this finds the residue connected to the endatom atom type
// and calls this function recursively on that residue
// this will NOT work across NORMAL bonds
void BaseMolecule::find_connected_subfragment(int resnum, int fragnum, 
         int *flgs, int endatom,  int altendatom, Atom::ResidueType restype, 
         ResizeArray<Fragment *> *subfragList)
{
  if (flgs[resnum] || residueList[resnum] -> residueType != restype) 
      return;  // been here before, or this is no good
  (*subfragList)[fragnum] -> append(resnum);  // add to the list
  flgs[resnum] = TRUE;                        // and prevent repeats

//  msgInfo << resnum << " is in one" << sendmsg;
  // find the atom in this residue with the right type
  int i, j, nextres;
  Atom *a;
  for (i=residueList[resnum] -> atoms.num() - 1; i>=0; i--) {
    a = atomList[residueList[resnum] -> atoms[i]];
//    msgInfo << "checking atom " << i << sendmsg;
    if (a -> nameindex == endatom ||
        a -> nameindex == altendatom ) {   // found the end atom
//      msgInfo << "It is the end" << sendmsg;
      for (j=a->bonds-1; j>=0; j--) {  // look at the bonds
// I can't look at if the residue "bond" is a PRO-PRO or NUC-NUC, since
// that won't tell me if the atom involved is the endatom atom
// This is important because I need to avoid things like S-S bonds
// (note that I never checked that the end was bonded to a start on
//  the next residue! - c'est la vie, or something like that
        if (a->bondType[j] != Atom::NORMAL &&                   // not backbone
            (nextres = atomList[a->bondTo[j]] -> uniq_resid) != resnum &&
            !flgs[nextres] ) { // found next residue, and unvisited
//          msgInfo << "Trying it" << sendmsg;
          find_connected_subfragment(nextres, fragnum, flgs, endatom,
              altendatom, restype, subfragList);
          return; // only find one; assume no branching
        }
      } // end of for
    } // end of found correct endtype
  } // searching atoms
//  msgInfo << "done with residue " << resnum << sendmsg;
} // end of finding connected subfragment

// find a class of fragments
void BaseMolecule::find_subfragments(int startatom, int endatom, int altendatom,
    Atom::ResidueType restype, ResizeArray<Fragment *> *subfragList)
{
  int *flgs = new int[residueList.num()];
  int i, j, k;
  Atom *a;
  
  for (i=residueList.num()-1; i>=0; i--) {
    flgs[i] = 0;
  }
  // find an unvisited protein residue which has an startatom with no off
  // residue bond to the same restype
  for (i=residueList.num()-1; i>=0; i--) {
    if (!flgs[i] &&
        residueList[i] -> residueType == restype) { // does this residue
      for (j=residueList[i]->atoms.num()-1; j>=0; j--) { // have an startatom
        if ((a=atomList[residueList[i]->atoms[j]]) -> nameindex == startatom){
//          msgInfo << "Found start atom for " << restype << sendmsg;
          for (k=a->bonds-1; k>=0; k--) {
//            msgInfo << "uniq_resid  " << atomList[a->bondTo[k]] -> uniq_resid << "  ";
//            msgInfo << "(expecting " << i << "  ";
//            msgInfo << "restype  " <<  atomList[a->bondTo[k]] -> residueType << sendmsg;
            if (atomList[a->bondTo[k]] -> uniq_resid != i && // off residue
                atomList[a->bondTo[k]] -> residueType == restype) {
                           // to some other type of residue?
                 break; // then stop, so that k>=0
             }
            }
          if (k<0) { // if yes, find stuff downchain
            subfragList->append(new Fragment);
            find_connected_subfragment(i, subfragList->num()-1, flgs, 
                  endatom, altendatom, restype, subfragList );
          } // found starting residue
        } // found startatom
      } // going through atoms
    } // found restype
  } // going through residues
  delete [] flgs;
} // found 'em all


// static storage for the atom_full_name routine
static char atom_full_name_buf[256];

// return a string containing the full name specification for the
// specified atom.  If the second argument is not NULL, the string is
// also copied into the given string storage
char *BaseMolecule::atom_full_name(int a, char *newstr) {

  if(a < 0 || a >= nAtoms)
    return NULL;
    
  Atom *nameAtom = atom(a);
  
//  sprintf(atom_full_name_buf, "%d:%s-%s:%s", ID, nameAtom->segnamestr,
//		nameAtom->residstr, nameAtom->namestr);

  sprintf(atom_full_name_buf, "%-d/%-d", id(), a);

  if(newstr)
    strcpy(newstr, atom_full_name_buf);
    
  return atom_full_name_buf;
}


// return a string containing a shorter name specification for the
// specified atom.  If the second argument is not NULL, the string is
// also copied into the given string storage
char *BaseMolecule::atom_short_name(int a, char *newstr) {

  if(a < 0 || a >= nAtoms)
      return NULL;

  Atom *nameAtom = atom(a);

  sprintf(atom_full_name_buf, "%s%s:%s", nameAtom->resnamestr,
		nameAtom->residstr, nameAtom->namestr);

  if(newstr)
    strcpy(newstr, atom_full_name_buf);

  return atom_full_name_buf;
}


