
%{


/***************************************************************************
 *cr                                                                       
 *cr            (C) Copyright 1995 The Board of Trustees of the           
 *cr                        University of Illinois                       
 *cr                         All Rights Reserved                        
 *cr                                                                   
 ***************************************************************************/

/***************************************************************************
 * RCS INFORMATION:
 *
 *	$RCSfile: AtomLexer.l,v $
 *	$Author: dalke $	$Locker:  $		$State: Exp $
 *	$Revision: 1.4 $	$Date: 1995/05/23 20:38:46 $
 *
 ***************************************************************************
 * DESCRIPTION:
 *
 * break atom selection information into its tokens
 *
 ***************************************************************************/



#include <stdlib.h>
#include <strings.h>
#include "AtomParser.h"
#include "y.tab.h"
#include "Inform.h"

/* redefine the input to come from a string */
#undef input
#undef unput
#define input() (*atomparser_yystring++)
#define unput(c) (*--atomparser_yystring = c)

%}

numeric		[0-9]
alpha		[a-zA-Z_]
alphanum	({alpha}|{numeric})
float		([0-9]+\.|([0-9]*\.[0-9]+)(eE][-+]?[0-9]+)?)
%{
 /* like_1A  catches raw words like 1A, 3', 5*A
		(start with a number and contain an alpha or ', ", or *)
    like_C5' catches ones like C5' O*, O5*
		(starts with an alpha and has a ', ", or *)
  */
 /*  Problem is that * is too easy to confuse, eg, "x* x"
   I won't worry about this until the next rewrite and only allow
   primes
wierd		[\'\"\*]
  */

%}

wierd		[\'\"]
like_1A		{numeric}+({alpha}|{wierd})+({alphanum}|{wierd})*
like_C5prime	{alpha}+{alphanum}*{wierd}+({alphanum}|{wierd})*
		
%%
\(		{ return('(');}
\)		{ return(')');}
(and|\&\&)	{ return(AND);}
(or|\|\|)	{ return(OR);}
not		{ return(NOT);}
within		{ return(WITHIN);}
of              { return(OF);}
same		{ return(SAME);}
as		{ return(AS);}
(to|\.\.\.)	{ return(THROUGH);}
where           { return(WHERE);}

:		{ return(RANGE);}
{float}		{
		  yylval.dval = atof((char *) yytext);
		  return(FLOAT);
		}
\"([^\"]|\\\\|\\\")*\"	{ /* escapes \ and " in a "string"  */
		  yylval.node = new atomparser_node(WORD);
		  yylval.node->sele.s = ((char *) yytext)+1;
		  /* GNU needs a "chop" */
		  yylval.node->sele.s(
			(int) (yylval.node->sele.s).length()-1, 1
					) = "";
		  /* replace \\ with \ */
		  yylval.node->sele.s.gsub("\\\"", "\"");
		  /* replace \" with " */
		  yylval.node->sele.s.gsub("\\\\", "\\");
		  yylval.node->sele.st = DQ_STRING;
		  return WORD;
		}
\'([^\']|\\\\|\\\')*\'	{ /* escapes \ and ' in a 'string', */
		  /* for instance, this lets you do 'C5\''  */
		  yylval.node = new atomparser_node(WORD);
		  yylval.node->sele.s = ((char *) yytext)+1;
		  /* GNU needs a "chop" */
		  yylval.node->sele.s(
			(int) (yylval.node->sele.s).length()-1, 1
					) = "";
		  /* replace \' with ' */
		  yylval.node->sele.s.gsub("\\'", "'");
		  /* replace \\ with \ */
		  yylval.node->sele.s.gsub("\\\\", "\\");
		  yylval.node->sele.st = SQ_STRING;
		  return WORD;
		}
\"[^\"]*	{
		  msgErr << "Unterminated double quoted string: "
			   << (char *) yytext << sendmsg;
		  return ERROR;
		}
\'[^\']*	{
		  msgErr << "Unterminated single quoted string: "
			   << (char *) yytext << sendmsg;
		  return ERROR;
		}

"<"		{ return(NLT); /* these are for numeric comparisons */}
"<="		{ return(NLE);}
"=="		{ return(NEQ);}
"="		{ return(NEQ); /* many people use it */}
">="		{ return(NGE);}
">"		{ return(NGT);}
"!="		{ return(NNE);}

"lt"		{ return(SLT); /* these are for string comparisons */}
"le"		{ return(SLE);}
"eq"		{ return(SEQ);}
"ge"		{ return(SGE);}
"gt"		{ return(SGT);}
"ne"		{ return(SNE);}
"~="		{ return(MATCH);}

"+"		{ return(ADD);}
"-"		{ return(SUB);}
"/"		{ return(DIV);}
"*"		{ return(MULT);}
"%"		{ return(MOD);}
"^"		{ return(EXP);}
"**"		{ return(EXP);}

[ \t\n]		;

{like_1A}|{like_C5prime} { /* catch raw names like: */
		  /* 5' 1A C4' 3'A           */
		  yylval.node = new atomparser_node(WORD);
		  yylval.node->sele.s = (char *) yytext;
		  yylval.node->sele.st = RAW_STRING;
		  return WORD;
		}
{numeric}+	{ yylval.ival = atoi((char *) yytext); 
		  return(INT); 
		}
{alpha}{alphanum}* {  /* standard variable names */
		  yylval.node = new atomparser_node(WORD);
		  yylval.node->sele.s = (char *) yytext;
		  yylval.node->sele.st = RAW_STRING;
		  int len = strlen((char *) yytext);
		  int i;
		  if ((i= atomparser_yylookup((char *) yytext, len)) >= 0) {
		     yylval.node->extra_type = i;
		     if (atomparser_symbols[i]->is_a ==
			 SymbolTableName::FUNCTION) {
			yylval.node->node_type = FUNC;
			return FUNC;
		     } else if (atomparser_symbols[i]->is_a ==
				SymbolTableName::KEYWORD) {
			yylval.node->node_type = KEY;
			return KEY;
		     } else if (atomparser_symbols[i]->is_a ==
				SymbolTableName::SINGLEWORD) {
			yylval.node->node_type = SINGLE;
			return SINGLE;
		     } else if (atomparser_symbols[i]->is_a ==
				SymbolTableName::STRINGFCTN) {
			yylval.node->node_type = STRFCTN;
			return STRFCTN;
		     }
		  }
		  return WORD;
                }
.		{ msgErr << "Bad character:"
			 << int(*yytext) << ':'
			 << *yytext << sendmsg; 
		  return ERROR; 
		}
%%

#include "SymbolTable.h"

int atomparser_yylookup(const char *s, int len) 
{
   int i;
   for(i=0; i<atomparser_numsymbols; i++) {
      if (atomparser_symbols[i]->regex->match(s,len) != -1) {
//	 printf("Found match for symbol %s.\n", s);
	 return i;
      }
   }
//   printf("Didn't find a match for %s\n", s);
   return -1;
}
      

// pointer to the input string
char *atomparser_yystring;

// pointer to the array of symbols
SymbolTableName **atomparser_symbols;
int atomparser_numsymbols;



