/***************************************************************************
 *cr                                                                       
 *cr            (C) Copyright 1995 The Board of Trustees of the           
 *cr                        University of Illinois                       
 *cr                         All Rights Reserved                        
 *cr                                                                   
 ***************************************************************************/

/***************************************************************************
 * RCS INFORMATION:
 *
 *      $RCSfile: AtomSel.C,v $
 *      $Author: billh $        $Locker:  $                $State: Exp $
 *      $Revision: 1.13 $      $Date: 1995/05/11 21:36:17 $
 *
 ***************************************************************************
 * DESCRIPTION:
 * 
 * Parse and maintain the data for selecting atoms.
 *
 ***************************************************************************/
#ifdef ARCH_HPUX9
  static char ident[] = "@(#)$Header: /tmp_mnt/Home/h2/billh/projects/vmd/src/RCS/AtomSel.C,v 1.13 1995/05/11 21:36:17 billh Exp $";
#endif

#include <string.h>
#include <stdlib.h>
#include <unistd.h> // for regex
#include <ctype.h>  // for isspace, etc.
#include "AtomSel.h"
#include "BaseMolecule.h"
#include "MoleculeList.h"
#include "Inform.h"
#include "utilities.h"
#include "config.h" // for CMDLEN

#include "Tokenize.h"


#define PARSE_DEBUG(x)
// uncomment to turn on a lot of debugging comments
//#define PARSE_DEBUG(x) x

// a string array with text descriptions of selection methods
char *AtomSelName[AtomSel::TOTAL] = { "None", "All", "Backbone", "Name",
	"Type", "ResName", "ResType", "ResID", "SegName", "ID" ,
	"Protein", "Nucleic", "Waters", "Fragment", "PFrag", "NFrag"};


//////////////////////////  constructor and destructor
// constructor; parse string and see if OK
AtomSel::AtomSel(MoleculeList *mlist) {
  
  MSGDEBUG(2,"Creating new AtomSel object ..." << sendmsg);

  // initialize variables
  molList = mlist;
  mol = NULL;
  selected = 0;
  on = NULL;
  cmdStr = NULL;
  parse_tree = NULL;
  change(AtomSelName[DEFAULT_ATOMSEL]);
}


// copy constructor

AtomSel::AtomSel(AtomSel& as) {
  PARSE_DEBUG(msgInfo << "Doing the copy thang" << sendmsg;)
  cmdStr = stringdup(as.cmdStr);
  parse_tree = NULL;
  on = NULL;
  selected = 0;
  mol = NULL;
  molList = NULL;
  change(cmdStr);  // now make the parse tree
  find(mol);       // and find the data
  
  // actually, can I just copy that info?
  //on = new char[as.mol->nAtoms];
  //memcpy(on, as.on, as.mol->nAtoms * sizeof(char));
  //selected = as.selected;
  
  PARSE_DEBUG(msgInfo << "It is done. " << sendmsg);
}


// destructor; free up space
AtomSel::~AtomSel(void) {
  if(on)
    delete [] on;
  if (parse_tree) {
      delete parse_tree -> data -> data;
      delete parse_tree;
  }
  if (cmdStr)
    delete [] cmdStr;
}

// equal operator, to change the current settings.
// Does NOT change the current molecule.
AtomSel& AtomSel::operator=(const AtomSel &as) {
  PARSE_DEBUG(msgInfo << "Doing the = operator" << sendmsg;)
  if (cmdStr)
    delete[] cmdStr;
  cmdStr = stringdup(as.cmdStr);

  if (parse_tree) {
      delete parse_tree -> data -> data;
      delete parse_tree;
      parse_tree = NULL;
  }
  if (on) {
    delete [] on;
    on = NULL;
  }

  change(cmdStr);
  find(mol);
  // actually, can I just copy that info?
  //on = new char[as.mol->nAtoms];
  //memcpy(on, as.on, as.mol->nAtoms * sizeof(char));
  //selected = as.selected;
  PARSE_DEBUG(msgInfo << "Finished with the = operator" << sendmsg;)
  return *this;
}


  // provide new settings; does a 'find' at the end if a mol has
  // been previously provided.
int AtomSel::change(char *newcmd) {
  // do the parens check myself to save time later on
  int parens = 0;
  int okay = TRUE;
  for (char *s=newcmd; *s && okay ; s++) { // go through the string
    if (*s == '(') parens++;
    if (*s == ')') parens--;
    if (parens<0) okay = FALSE;
  }
  if (!okay) {
    msgErr << "Unbalanced parentheses in atom selection string";
    msgErr << sendmsg;
    return FALSE;
  }
  PARSE_DEBUG(msgInfo << "Starting the parse" << sendmsg;)
    //  now do the parsing of the data
    Tree<ParseType> *newtree = new Tree<ParseType>;  // set up the root node
    newtree -> data -> data = stringdup(newcmd);
    newtree -> data -> op = DATA;
  PARSE_DEBUG(msgInfo << "Doing it"<< sendmsg;)
    
    int retval = parse_cmd(newtree);  // parse the tree
    
    if(retval) {  // if it is good, keep it
      PARSE_DEBUG(msgInfo << "Delete old" << sendmsg;)
      if (parse_tree) {
        delete parse_tree -> data -> data;
        delete parse_tree;
      }
      if (!cmdStr) {
        PARSE_DEBUG(msgErr << "Bogus command string" << sendmsg;)
      } else {
        delete [] cmdStr;
      }
      cmdStr = stringdup(newcmd);
      parse_tree = newtree;
      if (mol) {
        PARSE_DEBUG(msgInfo << "Find in current mol" << sendmsg;)
        retval = find(mol);
        PARSE_DEBUG(msgInfo << "Found" << sendmsg;)
      }
    } else {             // else throw it away
      PARSE_DEBUG(msgInfo << "Bad find" << sendmsg;)
      delete newtree -> data -> data;
      delete newtree;
      PARSE_DEBUG(msgInfo << "All gone." << sendmsg;)
    }
    PARSE_DEBUG(msgInfo << "change is done" << sendmsg;)
    return retval;
  }

//////////////////////////  private routines

// is this string a:  [non-alphanumeric]and[non-alphanumeric] ?
static int is_and(char *s) {
  if (isalnum(*s)) {  // check character before the and
    return FALSE;
  }
  if (strncmp(s+1, "and", 3)) { // these 3 chars match?
    return FALSE;
  }
  if (isalnum(*(s+4))) { // check the character after the "and"
    return FALSE;
  }
  return TRUE;
}
// is this string a:  [non-alphanumeric]or[non-alphanumeric] ?
static int is_or(char *s) {
  if (isalnum(*s)) {  // check character before the or
    return FALSE;
  }
  if (strncmp(s+1, "or", 2)) { // these 2 chars match?
    return FALSE;
  }
  if (isalnum(*(s+3))) { // check the character after the "or"
    return FALSE;
  }
  return TRUE;
}

///////////////////////////////////////////////////////
// compare the first two strings up to either
// character n, or to the first space.  If either is true
// then return 0, else return 1
static int space_strncasecmp(char *s1, char *s2, int n)
{
  for (; *s1 && *s2 && !isspace(*s1) && n>0; s1++, s2++, n--) {
    if (toupper(*s1) != toupper(*s2))
      return 1;
  }
  if (n==0)    // look no farther, if I reached the end
    return 0;
  if (!*s1 || !*s2) {  // did either reach the end?
    if (*s1 == *s2 || isspace(*s1) || isspace(*s2))    // both did
      return 0;
    return 1;          // only one
  }
  if (*s1 != *s2) {  // *s1 is whitespace, but is s2 the same
    return 1;        //   character as *s2?
  }
  return 0;
}

//////////////////////// parse_cmd //////////////////////////
// parse the given command, and store results.  Return success.
int AtomSel::parse_cmd(Tree<ParseType> *node) {
  if (node -> data -> op != DATA) {
    msgErr << "Trying to parse non-data node info in AtomSel::parse_cmd";
    msgErr << sendmsg;
    return 0;
  }
  
  char *s;
  PARSE_DEBUG(msgInfo << "analyzing " << node -> data -> data << sendmsg;)
  int parens;
  
  // go in order of least to most precedence
  // now find ORs outside of parens
  PARSE_DEBUG(msgInfo << "Looking for an 'or'" << sendmsg;)
  parens = 0;
  for (s = node -> data -> data; *s; s++) {
    if (*s == '(') parens++;
    if (*s == ')') parens--;
    if (!parens) { // if not inside parens
      if (is_or(s)) {  // found an "or"
        PARSE_DEBUG(msgInfo << "Found an 'or'" << sendmsg;)
        *(s+1) = 0;  // get rid of it
        *(s+2) = 0;
        Tree<ParseType> *left = new Tree<ParseType>;
        Tree<ParseType> *right = new Tree<ParseType>;
        left -> data -> data = node -> data -> data;
        left -> data -> op = DATA;
        right -> data -> data = s+3;
        right -> data -> op = DATA;
        node -> data -> op = OR;
        node -> left = left;
        node -> right = right;
        return parse_cmd(left) && parse_cmd(right);
      }
    }
  }  // end search for "or"s
  
  //  find any ANDs outside of parens
  PARSE_DEBUG(msgInfo << "Looking for an 'and'" << sendmsg;)
  parens = 0;
  for (s = node -> data -> data; *s; s++) {
    if (*s == '(') parens++;
    if (*s == ')') parens--;
    if (!parens) { // if not inside parens
      if (is_and(s)) {  // found an "and"
        PARSE_DEBUG(msgInfo << "Found an 'and'" << sendmsg;)
        *(s+1) = 0;  // get rid of the and
        *(s+2) = 0;
        *(s+3) = 0;
        Tree<ParseType> *left = new Tree<ParseType>;
        Tree<ParseType> *right = new Tree<ParseType>;
        left -> data -> data = node -> data -> data;
        left -> data -> op = DATA;
        right -> data -> data = s+4;
        right -> data -> op = DATA;
        node -> data -> op = AND;
        node -> left = left;
        node -> right = right;
        return parse_cmd(left) && parse_cmd(right);
      }
    }
  }  // end search for "and"s
  
  PARSE_DEBUG(msgInfo << "Looking for a 'not'" << sendmsg;)
  // now find NOTs outside of parens
  // "not" must be the first non-space character
  for (s = node -> data -> data; *s; s++) {
    if (!isspace(*s) && !strncmp(s, "not", 3) &&
         !isalnum(*(s+3))) {
      PARSE_DEBUG(msgInfo << "Found a 'not'" << sendmsg;)
      *(s+0) = 0;  // get rid of it
      *(s+1) = 0;
      *(s+2) = 0;
      Tree<ParseType> *left = new Tree<ParseType>;
      left -> data -> data = s+3;
      left -> data -> op = DATA;
      node -> data -> op = NOT;
      node -> left = left;
      node -> right = NULL;
      return parse_cmd(left);
    }
  }
   
  PARSE_DEBUG(msgInfo << "Looking for parens" << sendmsg;)
  // next is parentheses -- there are no ands or ors on the outside
  for (s = node -> data -> data; *s; s++) {
    if (*s == '(') {  // find the first parens
      parens = 1;
      s++; // point to first character after then '('
      for (char *t = s; *t && parens>0; t++) {  // find matching parens
        if (*t == '(') parens++;
        if (*t == ')') parens--;
      }
      if (parens > 0) {  //oops!
        msgErr << "Unbalanced parens! (shouldn't get this error message!)";
        msgErr << sendmsg;
        return FALSE;
      }
      char *u = t;  // make sure the stuff after the parens is blank
      while (*u) {
        if (!isspace(*u)) { // should be nothing after the end parens!
          msgErr << "Unable to parse information after a ')'" << sendmsg;
          return FALSE;
        }
        u++;
      }
      // s points to the front, t points to 1 past the parens
      PARSE_DEBUG(msgInfo << "Found it" << sendmsg;)
      t--; // point t on the ')'
      *t = 0;  // terminate the end
      Tree<ParseType> *left = new Tree<ParseType>;
      left -> data -> data = s;
      left -> data -> op = DATA;
      node -> data -> op = PARENS;
      node -> left = left;
      node -> right = NULL;
      return parse_cmd(left);
    } else if (!isspace(*s)) {  //  parens MUST start the block
      break;
    }
  }
  
  // finally, the ()s, ands, ors, and nots are done; I just have a command
  
  PARSE_DEBUG(msgInfo << "Didn't find a binary operation" << sendmsg;)
  // get rid of leading whitespace
  for (s = node -> data -> data; *s && isspace(*s); s++) {
  }
  if (!*s) {
    msgErr << "You missed a selection" << sendmsg;
    return FALSE;
  }

  // is it "around <dist> of (<selection>)" ?
  // is it "within <dist> of (<selection>)" ?
  
  if (!space_strncasecmp(s, "around", 6) ||  // either name is okay
      !space_strncasecmp(s, "within", 6) ) {
    PARSE_DEBUG(msgInfo << "Setting up an 'around' search" << sendmsg;)
    s+=7;
    char *t = s;
    while (*t && isspace(*t)) { // get past the spaces after "around"
      t++;
    }
    if (!*t) {
      msgErr << "Couldn't find distance after 'around'" << sendmsg;
      return FALSE;
    }
    char *u = t;
    while (*u && !isspace(*u)) {// get the size
      u++;
    }
    if (!*u) {
      msgErr << "Couldn't find the 'of' after 'around'" << sendmsg;
      return FALSE;
    }
    *u = 0;
    float f = atof(t);
    if (f <= 0) {
      msgErr << "around distance of " << f << " is invalid" << sendmsg;
      return FALSE;
    }
    u++;
    while (*u && isspace(*u)) { // look for the start of the of
      u++;
    }
    if (!*u || space_strncasecmp(u, "of", 2)) {
      msgErr << "Couldn't find the 'of' after finding the distance" << sendmsg;
      return FALSE;
    }
    // that's as far as I need to go
    // the distance is 't'
    // the rest of the selection is at 'u'
    Tree<ParseType> *left = new Tree<ParseType>;
    left -> data -> data = u+2;  // get past the of
    left -> data -> op = DATA;
    node -> data -> op = AROUND;
    node -> left = left;
    sprintf(node -> data -> data, "%f", f); 
    return parse_cmd(left);
  }
  
  // find all atoms that are in the same X as the selection
  // Ex: same residue as within 5 of resname LYR
  if (!space_strncasecmp(s, "same", 4) ) {
    s+=5;
    char *t = s;
    while (*t && isspace(*t)) { // get past spaces after "same"
      t++;
    }
    if (!*t) {
      msgErr << "Couldn't find a name after 'same'" << sendmsg;
      return FALSE;
    }
    char *u = t;
    while (*u && !isspace(*u)) { // get the name
      u++;
    }
    if (!*u) {
      msgErr << "Couldn't find the 'as' after 'same'" << sendmsg;
      return FALSE;
    }
    *u = 0;
    // check that the name is valid
    int same;
    {
      int okay = FALSE;
      if (!space_strncasecmp(t, AtomSelName[RESNAME], CMDLEN)) {
        okay = TRUE; same = RESNAME;
      } else if (!space_strncasecmp(t, AtomSelName[SEGNAME], CMDLEN)) {
        okay = TRUE; same = SEGNAME;
      } else if (!space_strncasecmp(t, AtomSelName[FRAGMENT], CMDLEN)) {
        okay = TRUE; same = FRAGMENT;
      } else if (!space_strncasecmp(t, AtomSelName[PFRAGMENT], CMDLEN)) {
        okay = TRUE; same = PFRAGMENT;
      } else if (!space_strncasecmp(t, AtomSelName[NFRAGMENT], CMDLEN)) {
        okay = TRUE; same = NFRAGMENT;
      }
      if (!okay) {
        msgErr << "Cannot understand '" << t << "' in the context of ";
        msgErr << "a 'same as' search." << sendmsg;
        return FALSE;
      }
    }
    u++;
    while (*u && isspace(*u)) { // look for the start of the 'as'
      u++;
    }
    if (!*u || space_strncasecmp(u, "as", 2)) {
      msgErr << "Couldn't find the 'as' after finding the name" << sendmsg;
      return FALSE;
    }
    // Parsed
    Tree<ParseType> *left = new Tree<ParseType>;
    left -> data -> data = u+2;  // get past the as
    left -> data -> op = DATA;
    node -> data -> op = SAME;
    node -> left = left;
    sprintf(node -> data -> data, "%s", t);
    return parse_cmd(left);
  }
    
  // otherwise it is boring
  int okay = FALSE;
  if(!space_strncasecmp(s, AtomSelName[ALL], CMDLEN)) {
    node -> data -> compare = ALL; okay = TRUE;
  } else if(!strupncmp(s, AtomSelName[NONE], CMDLEN)) {
    node -> data -> compare = NONE; okay = TRUE;
  } else if(!space_strncasecmp(s, AtomSelName[BACKBONE], CMDLEN)) {
    node -> data -> compare = BACKBONE; okay = TRUE;
  } else if(!space_strncasecmp(s, AtomSelName[PROTEIN], CMDLEN)) {
    node -> data -> compare = PROTEIN; okay = TRUE;
  } else if(!space_strncasecmp(s, "amino-acids", CMDLEN)) {
    node -> data -> compare = PROTEIN; okay = TRUE;
  } else if(!space_strncasecmp(s, AtomSelName[NUCLEIC], CMDLEN)) {
    node -> data -> compare = NUCLEIC; okay = TRUE;
  } else if(!space_strncasecmp(s, AtomSelName[WATERS], CMDLEN)) {
    node -> data -> compare = WATERS; okay = TRUE;
  } else if(!space_strncasecmp(s, AtomSelName[NAME], CMDLEN)) {
    node -> data -> compare = NAME; okay = TRUE;
  } else if(!space_strncasecmp(s, AtomSelName[TYPE], CMDLEN)) {
    node -> data -> compare = TYPE; okay = TRUE;
  } else if(!space_strncasecmp(s, AtomSelName[RESNAME], CMDLEN)) {
    node -> data -> compare = RESNAME; okay = TRUE;
  } else if(!space_strncasecmp(s, AtomSelName[RESTYPE], CMDLEN)) {
    node -> data -> compare = RESTYPE; okay = TRUE;
  } else if(!space_strncasecmp(s, AtomSelName[RESID], CMDLEN)) {
    node -> data -> compare = RESID; okay = TRUE;
  } else if(!space_strncasecmp(s, AtomSelName[SEGNAME], CMDLEN)) {
    node -> data -> compare = SEGNAME; okay = TRUE;
  } else if(!space_strncasecmp(s, AtomSelName[ID], CMDLEN)) {
    node -> data -> compare = ID; okay = TRUE;
  } else if(!space_strncasecmp(s, AtomSelName[FRAGMENT], CMDLEN)) {
    node -> data -> compare = FRAGMENT; okay = TRUE;
  } else if(!space_strncasecmp(s, AtomSelName[PFRAGMENT], CMDLEN)) {
    node -> data -> compare = PFRAGMENT; okay = TRUE;
  } else if(!space_strncasecmp(s, AtomSelName[NFRAGMENT], CMDLEN)) {
    node -> data -> compare = NFRAGMENT; okay = TRUE;
  } else {
    // unknown selection
    okay = FALSE;
  }
  if (!okay) {
    msgErr << "I have no idea how to do: " << s << sendmsg;
    return FALSE;
  }
  PARSE_DEBUG(msgInfo << "The command is:" << 
    AtomSelName[node->data->compare] << sendmsg;)
  // get the text after the first word
  while (*s && !isspace(*s)) {  // pass the word
    s++;
  }
  while (*s && isspace(*s)) {   // pass the spaces after the word
    s++;
  }
  
  if (!*s && node -> data -> compare != ALL && node -> data -> compare != NONE &&
         (node -> data -> compare != BACKBONE &&
          node -> data -> compare != NUCLEIC  &&
          node -> data -> compare != WATERS  &&
          node -> data -> compare != PROTEIN    ) ) {
    msgErr << "No parameters given for: " << node -> data -> data << sendmsg;
    return FALSE;
  }
  if (*s && !(node -> data -> compare != ALL && node -> data -> compare != NONE &&
          node -> data -> compare != BACKBONE &&
          node -> data -> compare != NUCLEIC  &&
          node -> data -> compare != WATERS  &&
          node -> data -> compare != PROTEIN)) {
     msgErr << "Parameters are not allowed for: " << AtomSelName[node->data->compare];
     msgErr << sendmsg;
     return FALSE;
  }
  // and save it
  char *t = stringdup(s);
  sprintf(node -> data -> data, "%s", t);
  delete [] t;
  
  // I'm done
  return TRUE;
}




// a "special" search contains wildcards (from X-PLOR);
// * matches any string
// % matches a single character
// # matches any number
// + matches any digit
// all other special characters are processed by regex
int AtomSel::has_special(char *str)
{
 for (char *s = str; *s; s++)
     if (!isalnum(*s) && *s != '\'')
       return TRUE;
 return FALSE;
}

// given a pattern as above, check of the data matches
// uses the regex utilities
int AtomSel::special_compile(char *pattern)
{
  // replace * with .?*
  // replace % with .?
  // replace # with [0-9]*
  // replace + with [0-9]
  char outs[500];  // it shouldn't ever be this large
  char *t = outs;
  *t='^'; // start at the beginning
  t++;
  for (char *s = pattern; *s; s++) {
    if (*s == '*') {
      *t++ = '.'; *t++ = '?'; *t++ = '*';                       // .?*
    } else if (*s == '%') {
      *t++ = '.'; *t++ = '?';                                   // .?
    } else if (*s == '#') {
      *t++ = '['; *t++ = '0'; *t++ = '-'; *t++ = '9'; 
      *t++ = ']'; *t++ ='*';                                    // [0-9]*
    } else if (*s == '+' ) {
        *t++ = '['; *t++ = '0'; *t++ = '-'; *t++ = '9'; *t++ =']'; // [0-9]
    } else {
      *t = *s; t++;
    }
  }
  *t = '$'; // and end at the end
  // the string's been made, now compile it:
#ifdef ARCH_HPUX9
  {
    int i=regcomp(&re, outs, REG_EXTENDED);
    if (i != 0) {
      msgErr << "Couldn't make the expression to match: ";
      msgErr << pattern << sendmsg;
      char errs[256];
      (void) regerror(i, &re, errs, sizeof(errs));
      return FALSE;
    }
  }
#else
  {
   char *errs = re_comp(outs);
   if (errs) {
     msgErr << "Couldn't make the expression to match: " << pattern << sendmsg;
     msgErr << errs << sendmsg;
     return FALSE;
   }
  }
#endif
   // compiled
  return TRUE;
}
// I assume the pattern was already compiled
// return TRUE if the data matches the pattern
int AtomSel::special_compare(char *data)
{
#ifdef ARCH_HPUX9
  return !regexec(&re, data, (size_t) 0, NULL, 0);  // regexec returns 0 for match
#else
  return re_exec(data)==1;  // re_exec returns 1 if there was a match
#endif
}

// dealloc the compiled pattern
void AtomSel::special_free(void)
{
#ifdef ARCH_HPUX9
  regfree(&re);
#endif
}

// for the given molecule, find atoms for the molecule.  Stores the indices
// in this object for quick retrieval later.
// return total number of atoms selected for the molecule.
int AtomSel::find(BaseMolecule *m) {
  int i;
  if(!m) {
    // msgWarn << "No molecule listed for atom selection" << sendmsg;
    return 0;
  }
  if ( m->nAtoms < 1) {
    return 0;
  }
  PARSE_DEBUG(msgInfo << "Looking for selection among the " << m->nAtoms <<sendmsg;)
  
  MSGDEBUG(2,"AtomSel: Finding selected atoms for molecule " << m->id());
  MSGDEBUG(2," :" << sendmsg);

  if (!parse_tree) {
    msgErr << "No parse tree available" << sendmsg;
    return 0;
  }
  // save new molecule, and remove old storage if necessary  
  if(on && mol && mol->nAtoms != m->nAtoms) {
    delete [] on;
    on = NULL;
  }
    
  // allocate new storage
  mol = m;
  if(!on)
    on = new char[mol->nAtoms];
  
  // go through the parse tree and put the results in 'on'
  PARSE_DEBUG(msgInfo << "*********parse_find******" << sendmsg;)
  parse_find( mol, mol->nAtoms, parse_tree, on);
  
  // how many were selected?
  selected = 0;
  for (i=mol->nAtoms - 1; i>=0; i--) {
    if (on[i]) {
      selected++;
    }
  }
  
  MSGDEBUG(2,"         Atoms selected = " << selected << sendmsg);

  // return total number of selected atoms
  return selected;
}

//// specialized function to turn on all atoms in a given residue
void AtomSel::mark_atoms_given_residue(int residue, char *on)
{
  ResizeArray<int> *atoms = &(mol->residueList[residue]->atoms);
  for (int i= atoms->num()-1; i>=0; i--) {
     on[(*atoms)[i]] = TRUE;
  }
}


/////////////////////////// parse_find //////////////////////////
// go down the parse tree and find the atoms appropriate for this selection
void AtomSel::parse_find(BaseMolecule *mol, int n, Tree<ParseType> *node, 
  char *on)
{
  if (node -> data -> op == OR) { // or the searches from the left and right children
    PARSE_DEBUG(msgInfo << "Doing an OR descent" << sendmsg;)
    char *on1 = new char[n];
    char *on2 = new char[n];
    parse_find(mol, n, node->left, on1);
    parse_find(mol, n, node->right, on2);
    for (int i=n-1; i>=0; i--) {
      on[i] = on1[i] || on2[i];
    }
    delete [] on1;
    delete [] on2;
    return;
  }
  if (node -> data -> op == AND) {  // and the searches from the left and right children
    PARSE_DEBUG(msgInfo << "Doing an AND descent" << sendmsg;)
    char *on1 = new char[n];
    char *on2 = new char[n];
    parse_find(mol, n, node->left, on1);
    parse_find(mol, n, node->right, on2);
    for (int i=n-1; i>=0; i--) {
      on[i] = on1[i] && on2[i];
    }
    delete [] on1;
    delete [] on2;
    return;
  }
  if (node -> data -> op == NOT) {  // negate the result from the left child
    PARSE_DEBUG(msgInfo << "Doing a NOT descent" << sendmsg;)
    parse_find(mol, n, node->left, on);
    for (int i=n-1; i>=0; i--) {
      on[i] = !on[i];
    }
    return;
  }
  if (node -> data -> op == PARENS) {  // doesn't do anything, really
    PARSE_DEBUG(msgInfo << "Doing a PARENS descent" << sendmsg;)
    parse_find(mol, n, node->left, on);
    return;
  }
  
  // clear out the 'on' array
  // start off assuming everything is FALSE
  for (int i=n-1; i>=0; i--) {
    on[i] = FALSE;
  }
  
  // find the ones that are close to a sub-selection
  if (node -> data -> op == AROUND) { // doesn't search other molecules
    PARSE_DEBUG(msgInfo << "Doing an AROUND descent" << sendmsg;)
    if(mol->is_current()) {
      Timestep *ts = mol->current();
      char *on1 = new char[n];
      parse_find(mol, n, node->left, on1);
      float  x, y, z, dist;
      dist = atof(node -> data -> data);
      dist = dist * dist;
      PARSE_DEBUG(msgInfo << "Looking for atoms within " << dist << sendmsg;)
      for (int i=n-1; i>=0; i--) {      // for the ones in the selection
	if (on1[i]) {
          float *a1 = ts->pos + 3*i;
          for (int j=n-1; j>=0; j--) {      // find the ones that are close
            float *a2 = ts->pos + 3*j;
            x = a2[0] - a1[0];
            y = a2[1] - a1[1];
            z = a2[2] - a1[2];
            if (x*x+y*y+z*z <= dist) {      // and select them
               on[j] = TRUE;
             }
          }
	}
      }
      delete [] on1;
    }
    return;
  }
  
  // find the same search, for atoms that share the same X as the selection
  if (node -> data -> op == SAME) {
      int same;
      int okay = FALSE;
      char *t = node -> data -> data;
      if (!space_strncasecmp(t, AtomSelName[RESNAME], CMDLEN)) {
        okay = TRUE; same = RESNAME;
      } else if (!space_strncasecmp(t, AtomSelName[SEGNAME], CMDLEN)) {
        okay = TRUE; same = SEGNAME;
      } else if (!space_strncasecmp(t, AtomSelName[FRAGMENT], CMDLEN)) {
        okay = TRUE; same = FRAGMENT;
      } else if (!space_strncasecmp(t, AtomSelName[PFRAGMENT], CMDLEN)) {
        okay = TRUE; same = PFRAGMENT;
      } else if (!space_strncasecmp(t, AtomSelName[NFRAGMENT], CMDLEN)) {
        okay = TRUE; same = NFRAGMENT;
      }
      if (!okay) {
        msgErr << "Cannot understand '" << t << "' while evaluating ";
        msgErr << "a 'same' search -- this shouldn't happen." << sendmsg;
        return;
      }
      ResizeArray<int> nums(10);
      char *on1 = new char[n];
      parse_find(mol, n, node->left, on1);
      int i,j,num;
      if (same == RESNAME) {
        for (i=n-1; i>=0; i--) {
          if (on1[i]) {
            num = mol->atom(i)->uniq_resid;
            for (j=nums.num()-1; j>=0; j--) { // is it unique?
              if (nums[j] == num)
                break;
            }
            if (j < 0) {  // if so, add it
              nums.append(num);
            }
          }
        }  // found all the numbers, now search
        for (i=nums.num()-1; i>=0; i--) {
          mark_atoms_given_residue(nums[i], on);
        }
      } else if (same == SEGNAME) {
        for (i=n-1; i>=0; i--) {
          if (on1[i]) {
            num = mol->atom(i)->segnameindex;
            for (j=nums.num()-1; j>=0; j--) { // is it unique?
              if (nums[j] == num)
                break;
            }
            if (j < 0) {  // if so, add it
              nums.append(num);
            }
          }
        }  // found all the numbers, now search
        int tmpindex;
        for (j=n-1; j>=0; j--) {
          tmpindex = mol->atom(j)->segnameindex;
          for (i=nums.num()-1; i>=0; i--) {
            if ( tmpindex == nums[i]) {
              on[j] = TRUE;
              break;
            }
          }
        }
      } else if (same == FRAGMENT) {
        for (i=n-1; i>=0; i--) {
          if (on1[i]) {
            num = mol->atom(i)->fragment;
            for (j=nums.num()-1; j>=0; j--) { // is it unique?
              if (nums[j] == num)
                break;
            }
            if (j < 0) {  // if so, add it
              nums.append(num);
            }
          }
        }  // found all the numbers, now search
        int tmpindex;
        for (j=n-1; j>=0; j--) {
          tmpindex = mol->atom(j)->fragment;
          for (i=nums.num()-1; i>=0; i--) {
            if ( tmpindex == nums[i]) {
              on[j] = TRUE;
              break;
            }
          }
        }
      } else if (same == PFRAGMENT || same == NFRAGMENT) {
        int k;
        ResizeArray<Fragment *> *tmpfragList = (same == PFRAGMENT ? 
                 &(mol->pfragList) : &(mol->nfragList));
        for (i=tmpfragList->num()-1; i>=0; i--) { // go through the list
          for (j=(*tmpfragList)[i]->num()-1; j>=0; j--) { // of frags
            for (k=mol->residueList[(*tmpfragList)[i]->residues[j]]->
                   atoms.num()-1; k>=0; k--) { //check all the residues
              if (on1[j]) { // if this atom is turned on, rejoice
                break;
              }
            }
            if (k>=0) { // get out of all the loops
              break;
            }
          }
          if (k>=0) { // must have found an atom in this frag, so turn 'em on
            for (j=(*tmpfragList)[i]->num()-1; j>=0; j--) {
              mark_atoms_given_residue((*tmpfragList)[i]->residues[j], on);
            }
          }
        } // searched all the frags
      }
      delete [] on1;
      return;
    }

  
  if (node -> data -> op != DATA) {
    msgErr << "Strange operation in parse_find";
    return;
  }
  PARSE_DEBUG(msgInfo << "Found " << AtomSelName[node -> data -> compare] << sendmsg;)


  if (node -> data -> compare == ALL) {   // turn everything on
    for (int i=n-1; i>=0; i--) {
      on[i] = TRUE;
    }
    return;
  }
  if (node -> data -> compare == NONE) { // turn everything off
    return;
  }
  if (node -> data -> compare == BACKBONE) { // turn on the backbone
    for (int i=n-1; i>=0; i--) {
      on[i] = (mol -> atom(i) ->atomType != Atom::NORMAL);
    }
    return;
  }
  if (node -> data -> compare == PROTEIN) { // turn on the protein
    for (int i=n-1; i>=0; i--) {
      on[i] = (mol -> atom(i) ->residueType == Atom::PROTEIN);
    }
    return;
  }
  if (node -> data -> compare == NUCLEIC) { // turn on the nucleic acids
    for (int i=n-1; i>=0; i--) {
      on[i] = (mol -> atom(i) ->residueType == Atom::NUCLEIC);
    }
    return;
  }
  if (node -> data -> compare == WATERS) { // turn on the nucleic acids
    for (int i=n-1; i>=0; i--) {
      on[i] = (mol -> atom(i) ->residueType == Atom::WATERS);
    }
    return;
  }
  // all the rest have multiple fields
  // remember, everything by default is turned off at the start of this fctn

    char *field;
    PARSE_DEBUG(msgInfo << "Scanning -----:" << node -> data -> data << ":" << sendmsg;)
    
    Tokenize token(node -> data -> data, " ,;\t\n");
    PARSE_DEBUG(msgInfo << "There are " << token.num() << " elements" << sendmsg;)
    for (int toknum=0; toknum<token.num(); toknum++) {
      field = token.item(toknum);
      PARSE_DEBUG(msgInfo << "----------->>>>> Data: " << field << ":" << sendmsg;)
      if (has_special(field)) {
        PARSE_DEBUG(msgInfo << "Isn't that special?" << sendmsg;)
       if (special_compile(field)) {  // are there special (ie., regex) fields?
        switch (node -> data -> compare ) {
          case NAME: {
             for (int i=n-1; i>=0; i--) {
              if (special_compare(mol->atom(i)->namestr))
               on[i] = TRUE;
             }             
            }
            break;
          case TYPE: {
             for (int i=n-1; i>=0; i--) {
              if (special_compare(mol->atom(i)->typestr))
               on[i] = TRUE;
             }
            }            
            break;
          case RESNAME:
          case RESTYPE:{
             for (int i=n-1; i>=0; i--) {
              if (special_compare(mol->atom(i)->resnamestr))
               on[i] = TRUE;
             }
            }             
            break;
          case RESID:{
             for (int i=n-1; i>=0; i--) {
              if (special_compare(mol->atom(i)->residstr))
               on[i] = TRUE;
             }
            }             
            break;
          case SEGNAME:{
             for (int i=n-1; i>=0; i--) {
              if (special_compare(mol->atom(i)->segnamestr))
               on[i] = TRUE;
             }
            }         
            break;
          case ID:{
             char s[10];
             for (int i=n-1; i>=0; i--) { // this is REALLY SLOW
               sprintf(s, "%d", i);
               if (special_compare(s))
        	on[i] = TRUE;
             }
            }
            break;          
          case FRAGMENT:
          case PFRAGMENT:
          case NFRAGMENT: {
             char s[10];
             ResizeArray<Fragment *> *tmpfrag;
             switch(node->data->compare) {
               case FRAGMENT: tmpfrag = &(mol->fragList); break;
               case PFRAGMENT: tmpfrag = &(mol->pfragList); break;
               case NFRAGMENT: tmpfrag = &(mol->nfragList); break;
             }
             for (int i=tmpfrag->num()-1; i>=0; i--) { // REALLY SLOW
               sprintf(s, "%d", i);
               if (special_compare(s)) {
                 for (int j=(*tmpfrag)[i]->num()-1; j>=0; j--) {
                   mark_atoms_given_residue((*tmpfrag)[i]->
                                                    residues[j], on);
                 }
               }
             }
            }
            break;
          default:
            msgErr << "Unknown search type got into the 'name' search" << sendmsg;
        }
        special_free();
       } else { // couldn't make the regex
       // ignore it
       }
      } else { // not special, so I can do a faster comparison
        PARSE_DEBUG(msgInfo << "Bloomington-Normal" << sendmsg;)
        int idx, i;
        switch (node -> data -> compare ) {
          case NAME:
            idx = mol->atomNames.typecode(field);
            for ( i=n-1; i>=0; i--) {
             if (mol->atom(i)->nameindex == idx)
              on[i] = TRUE;
            }             
            break;
          case TYPE:
            idx = mol->atomTypes.typecode(field);
            for ( i=n-1; i>=0; i--) {
             if (mol->atom(i)->typeindex == idx)
              on[i] = TRUE;
            }             
            break;
          case RESNAME:
          case RESTYPE:
            idx = mol->resNames.typecode(field);
            for ( i=n-1; i>=0; i--) {
             if (mol->atom(i)->resnameindex == idx)
              on[i] = TRUE;
            }             
            break;
          case RESID:
            idx = mol->resIds.typecode(field);
            for ( i=n-1; i>=0; i--) {
             if (mol->atom(i)->residindex == idx)
              on[i] = TRUE;
            }             
            break;
          case SEGNAME:
            idx = mol->segNames.typecode(field);
            for ( i=n-1; i>=0; i--) {
             if (mol->atom(i)->segnameindex == idx)
              on[i] = TRUE;
            }             
            break;
          case ID:
            idx = atoi(field);
            if (idx >=0 && idx < n)
              on[idx] = TRUE;
            break;
          case FRAGMENT:
          case PFRAGMENT:
          case NFRAGMENT:{
            ResizeArray<Fragment *> *tmpfrag;
            switch(node->data->compare) {
               case FRAGMENT: tmpfrag = &(mol->fragList); break;
               case PFRAGMENT: tmpfrag = &(mol->pfragList); break;
               case NFRAGMENT: tmpfrag = &(mol->nfragList); break;
            }
            idx = atoi(field);
            if (idx >= 0 && idx < tmpfrag->num() ) {
              for ( i=(*tmpfrag)[idx]->num()-1; i>=0; i--) {
                mark_atoms_given_residue((*tmpfrag)[idx]->residues[i], on);
              }
            }
            break;
          }
          default:
            msgErr << "Unknown search type got into the 'name' search" << sendmsg;
        }
      }
      
    }  // end of loop over all tokens
  PARSE_DEBUG(msgInfo << "Leaving the loop" << sendmsg;)
  return;
}


/* REVISION HISTORY:********************************************************
 *
 * $Log: AtomSel.C,v $
 * Revision 1.13  1995/05/11  21:36:17  billh
 * Moved log message to end of file.
 *
 * Revision 1.12  95/03/24  18:47:05  billh
 * Added copyright notice to top of file; made sure all virtual routines
 * are defined in the .C file, not in the .h file.
 * 
 * Revision 1.11  1994/11/27  12:43:26  dalke
 * added searches for nfrag and pfrag
 *
 * Revision 1.10  1994/11/26  08:06:43  dalke
 * Completed fragment selection
 *
 * Revision 1.9  1994/11/26  07:36:42  dalke
 * Added a "same" search, as in, "same resname as <selection>"
 *
 * Revision 1.8  1994/11/25  20:39:29  dalke
 * added search for waters
 *
 * Revision 1.7  1994/11/25  13:17:02  dalke
 * Added searches by "protein" (find protein residues) and "nucleic"
 * (find nucleic acid residues)
 *
 * Revision 1.6  1994/11/12  10:24:13  dalke
 * Major revision for "powerful" atom selection
 *
 * Revision 1.5  1994/11/02  08:36:28  billh
 * Added possible 'not' as first word in selection
 *
 * Revision 1.4  1994/11/02  01:33:43  billh
 * Now can have multiple selections.  Still have problem of not seeing
 * resid selections correctly.
 *
 * Revision 1.3  94/10/31  20:33:34  billh
 * Added 'NONE' selection option, and fixed problem with error message disp.
 * 
 * Revision 1.2  1994/10/26  23:19:44  billh
 * Removed 'ok' var; removed initial string in constructor; added 'cmd_parse'
 * and operator= routines, as well as 'change' routine.
 *
 * Revision 1.1  94/09/23  06:01:39  billh
 * Initial revision
 * 
 ***************************************************************************/
