/* ======================================================================
                             lexer.c

  See comments in soar.h for an overview.
====================================================================== */

#include <ctype.h>
#include <errno.h>
#include "soar.h"

bool constituent_char[256];   /* is the character a symbol constituent? */
bool whitespace[256];         /* is the character whitespace? */
bool number_starters[256];    /* could the character initiate a number? */

struct lexeme_info lexeme;  /* global variable holding the current lexeme */

#define BUFSIZE (MAX_LEXER_LINE_LENGTH+2) /* +2 for newline and null at end */

/* --- we'll use one of these structures for each file being read --- */
typedef struct lexer_source_file_struct {
  struct lexer_source_file_struct *parent_file;
  char *filename;
  FILE *file;
  bool fake_rparen_at_eol;
  bool allow_ids;
  int parentheses_level;    /* 0 means top level, no left paren's seen */
  int current_column;       /* column number of next char to read (0-based) */
  unsigned long current_line;   /* line number of line in buffer (1-based) */
  int column_of_start_of_last_lexeme;   /* (used for error messages) */
  unsigned long line_of_start_of_last_lexeme;
  char buffer[BUFSIZE];              /* holds text of current input line */
  struct lexeme_info saved_lexeme;   /* save/restore it during nested loads */
  char saved_current_char;           /* save/restore this too */
} lexer_source_file;

lexer_source_file *current_file = NIL;  /* file we're currently working on */
#define reading_from_top_level() (! current_file->parent_file)

char current_char;                 /* holds current input character */

/* ======================================================================
                       Start/Stop Lex from File
                       
  The lexer maintains a stack of files being read, in order to handle nested
  loads.  Start_lex_from_file() and stop_lex_from_file() push and pop the
  stack.  Immediately after start_lex_from_file(), the current lexeme (global
  variable) is undefined.  Immediately after stop_lex_from_file(), the 
  current lexeme is automatically restored to whatever it was just before
  the corresponding start_lex_from_file() call.
====================================================================== */

void start_lex_from_file (char *filename, FILE *already_opened_file) {
  lexer_source_file *lsf;

  lsf = allocate_memory (sizeof(lexer_source_file),
                         MISCELLANEOUS_MEM_USAGE);
  lsf->saved_lexeme = lexeme;
  lsf->saved_current_char = current_char;
  lsf->parent_file = current_file;
  current_file = lsf;
  lsf->filename = make_memory_block_for_string (filename);
  lsf->file = already_opened_file;
  lsf->fake_rparen_at_eol = FALSE;
  lsf->allow_ids = TRUE;
  lsf->parentheses_level = 0;
  lsf->column_of_start_of_last_lexeme = 0;
  lsf->line_of_start_of_last_lexeme = 0;
  lsf->current_line = 0;
  lsf->current_column = 0;
  lsf->buffer[0] = 0;
  current_char = ' ';   /* whitespace--to force immediate read of first line */
}

void stop_lex_from_file (void) {
  lexer_source_file *lsf;

  if (reading_from_top_level()) {
    print ("Internal error: tried to stop_lex_from_file at top level\n");
    return;
  }
  lsf = current_file;
  current_file = current_file->parent_file;
  current_char = lsf->saved_current_char;
  lexeme = lsf->saved_lexeme;

  free_memory_block_for_string (lsf->filename);
  free_memory (lsf, MISCELLANEOUS_MEM_USAGE);
}

/* ======================================================================
                             Get next char

  Get_next_char() gets the next character from the current input file and
  puts it into the global variable current_char.
====================================================================== */

void get_next_char (void) {
  char *s;
  
  current_char = current_file->buffer [current_file->current_column++];
  if (current_char) return;

  if ((current_file->current_column == BUFSIZE) &&
      (current_file->buffer[BUFSIZE-2] != '\n') &&
      (current_file->buffer[BUFSIZE-2] != EOF_AS_CHAR)) {
    print ("Error:  line too long (max allowed is %d chars)\n",
           MAX_LEXER_LINE_LENGTH);
    print ("File %s, line %lu\n", current_file->filename,
           current_file->current_line);
    abort_with_fatal_error();
  }

  s = fgets (current_file->buffer, BUFSIZE, current_file->file);
  
  if (s) {
    current_file->current_line++;
    if (reading_from_top_level()) {
      tell_printer_that_output_column_has_been_reset ();
      if (logging_to_file)
        print_string_to_log_file_only (current_file->buffer);
    }
  } else {
    /* s==NIL means immediate eof encountered or read error occurred */
    if (! feof(current_file->file)) {
      if(reading_from_top_level()) {
        control_c_handler();
        return;
      } else {
        print ("I/O error while reading file %s; ignoring the rest of it.\n",
               current_file->filename);
      }
    }
    current_file->buffer[0] = EOF_AS_CHAR;
    current_file->buffer[1] = 0;
  }
  current_char = current_file->buffer[0];
  current_file->current_column = 1;
}

/* ======================================================================

                         Lexer Utility Routines

====================================================================== */

#define record_position_of_start_of_lexeme() { \
  current_file->column_of_start_of_last_lexeme = \
    current_file->current_column - 1; \
  current_file->line_of_start_of_last_lexeme = \
    current_file->current_line; }

#define store_and_advance() { \
  lexeme.string[lexeme.length++] = (isupper((char)current_char) ? \
                                    tolower((char)current_char) : \
                                    (char)current_char); \
  get_next_char(); }

#define finish() { lexeme.string[lexeme.length]=0; }

void read_constituent_string (void) {
  while ((current_char!=EOF_AS_CHAR) &&
         constituent_char[(unsigned char)current_char])
    store_and_advance();
  finish();
}  

void read_rest_of_floating_point_number (void) {
  /* --- at entry, current_char=="."; we read the "." and rest of number --- */
  store_and_advance();
  while (isdigit(current_char)) store_and_advance(); /* string of digits */
  if ((current_char=='e')||(current_char=='E')) {
    store_and_advance();                             /* E */
    if ((current_char=='+')||(current_char=='-'))
      store_and_advance();                       /* optional leading + or - */
    while (isdigit(current_char)) store_and_advance(); /* string of digits */
  }
  finish();
}

/* --- BUGBUG: these routines are here because the ANSI routine strtod() isn't
   available at CMU and the ANSI routine strtoul() isn't at ISI --- */
#ifndef __NeXT__
extern double atof();
#endif /* #ifndef __NeXT__ */

double my_strtod (char *ch, char **p, int base) {
  /* BUGBUG without ANSI's strtod(), there's no way to check for floating
     point overflow here.  If someone types "1.5E2000" weird things could
     happen. */
  return atof(ch);
}

unsigned long my_strtoul (char *ch, char **p, int base) {
  long result;
  
  errno = 0;
  result = strtol (ch,p,base);
  if (errno) return 0;
  if (result < 0) {
    errno = ERANGE;
    return 0;
  }
  return (unsigned long) result;
}

void determine_type_of_constituent_string (void) {
  bool possible_id, possible_var, possible_sc, possible_ic, possible_fc;
  bool rereadable;

  determine_possible_symbol_types_for_string (lexeme.string,
                                              lexeme.length,
                                              &possible_id,
                                              &possible_var,
                                              &possible_sc,
                                              &possible_ic,
                                              &possible_fc,
                                              &rereadable);

  /* --- check whether it's a variable --- */
  if (possible_var) {
    lexeme.type = VARIABLE_LEXEME;
    return;
  }

  /* --- check whether it's an integer --- */
  if (possible_ic) {
    errno = 0;
    lexeme.type = INT_CONSTANT_LEXEME;
    lexeme.int_val = strtol (lexeme.string,NULL,10);
    if (errno) {
      print ("Error: bad integer (probably too large)\n");
      print_location_of_most_recent_lexeme();
      lexeme.int_val = 0;
    }
    return;
  }
    
  /* --- check whether it's a floating point number --- */
  if (possible_fc) {
    errno = 0;
    lexeme.type = FLOAT_CONSTANT_LEXEME;
    lexeme.float_val = my_strtod (lexeme.string,NULL,10); 
    if (errno) {
      print ("Error: bad floating point number\n");
      print_location_of_most_recent_lexeme();
      lexeme.float_val = 0.0;
    }
    return;
  }
  
  /* --- check if it's an identifier --- */
  if (current_file->allow_ids && possible_id) {
    lexeme.id_letter = toupper(lexeme.string[0]);
    errno = 0;
    lexeme.type = IDENTIFIER_LEXEME;
    lexeme.id_number = my_strtoul (&(lexeme.string[1]),NULL,10);
    if (errno) {
      print ("Error: bad number for identifier (probably too large)\n");
      print_location_of_most_recent_lexeme();
      lexeme.id_number = 0;
    }
    return;
  }

  /* --- otherwise it must be a symbolic constant --- */
  if (possible_sc) {
    lexeme.type = SYM_CONSTANT_LEXEME;
    if (sysparams[PRINT_WARNINGS_SYSPARAM]) {
      if (lexeme.string[0] == '<') {
        if (lexeme.string[1] == '<') {
           print ("Warning: Possible disjunctive encountered in reading symbolic constant\n");
           print ("         If a disjunctive was intended, add a space after <<\n");
           print ("         If a constant was intended, surround constant with vertical bars\n");
           print_location_of_most_recent_lexeme();
	 } else {
           print ("Warning: Possible variable encountered in reading symbolic constant\n");
           print ("         If a constant was intended, surround constant with vertical bars\n");
           print_location_of_most_recent_lexeme();
         }
      } else {
        if (lexeme.string[lexeme.length-1] == '>') {
          if (lexeme.string[lexeme.length-2] == '>') {
           print ("Warning: Possible disjunctive encountered in reading symbolic constant\n");
           print ("         If a disjunctive was intended, add a space before >>\n");
           print ("         If a constant was intended, surround constant with vertical bars\n");
           print_location_of_most_recent_lexeme();
	 } else {
           print ("Warning: Possible variable encountered in reading symbolic constant\n");
           print ("         If a constant was intended, surround constant with vertical bars\n");
           print_location_of_most_recent_lexeme();
         }
	}
      }
    }
    return;
  }

  print ("Internal error: can't determine_type_of_constituent_string\n");
  abort_with_fatal_error();
}

void do_fake_rparen (void) {
  record_position_of_start_of_lexeme();
  lexeme.type = R_PAREN_LEXEME;
  lexeme.length = 1;
  lexeme.string[0] = ')';
  lexeme.string[1] = 0;
  if (current_file->parentheses_level > 0) current_file->parentheses_level--;
  current_file->fake_rparen_at_eol = FALSE;
}

/* ======================================================================
                        Lex such-and-such Routines

  These routines are called from get_lexeme().  Which routine gets called
  depends on the first character of the new lexeme being read.  Each routine's
  job is to finish reading the lexeme and store the necessary items in 
  the global variable "lexeme".
====================================================================== */

void (*(lexer_routines[256]))(void);

void lex_eof (void) {
  if (current_file->fake_rparen_at_eol) {
    do_fake_rparen();
    return;
  }
  store_and_advance();
  finish();
  lexeme.type = EOF_LEXEME;
}

void lex_at (void) {
  store_and_advance();
  finish();
  lexeme.type = AT_LEXEME;
}

void lex_tilde (void) {
  store_and_advance();
  finish();
  lexeme.type = TILDE_LEXEME;
}

void lex_up_arrow (void) {
  store_and_advance();
  finish();
  lexeme.type = UP_ARROW_LEXEME;
}

void lex_lbrace (void) {
  store_and_advance();
  finish();
  lexeme.type = L_BRACE_LEXEME;
}

void lex_rbrace (void) {
  store_and_advance();
  finish();
  lexeme.type = R_BRACE_LEXEME;
}

void lex_exclamation_point (void) {
  store_and_advance();
  finish();
  lexeme.type = EXCLAMATION_POINT_LEXEME;
}

void lex_comma (void) {
  store_and_advance();
  finish();
  lexeme.type = COMMA_LEXEME;
}

void lex_equal (void) {
  /* Lexeme might be "=", or symbol */
  /* Note: this routine relies on = being a constituent character */

  read_constituent_string();
  if (lexeme.length==1) { lexeme.type = EQUAL_LEXEME; return; }
  determine_type_of_constituent_string();
}

void lex_ampersand (void) {
  /* Lexeme might be "&", or symbol */
  /* Note: this routine relies on & being a constituent character */

  read_constituent_string();
  if (lexeme.length==1) { lexeme.type = AMPERSAND_LEXEME; return; }
  determine_type_of_constituent_string();
}

void lex_lparen (void) {
  store_and_advance();
  finish();
  lexeme.type = L_PAREN_LEXEME;
  current_file->parentheses_level++;
}

void lex_rparen (void) {
  store_and_advance();
  finish();
  lexeme.type = R_PAREN_LEXEME;
  if (current_file->parentheses_level > 0) current_file->parentheses_level--;
}

void lex_greater (void) {
  /* Lexeme might be ">", ">=", ">>", or symbol */
  /* Note: this routine relies on =,> being constituent characters */

  read_constituent_string();
  if (lexeme.length==1) { lexeme.type = GREATER_LEXEME; return; }
  if (lexeme.length==2) {
    if (lexeme.string[1]=='>') { lexeme.type = GREATER_GREATER_LEXEME; return;}
    if (lexeme.string[1]=='=') { lexeme.type = GREATER_EQUAL_LEXEME; return; }
  }
  determine_type_of_constituent_string();
}
    
void lex_less (void) {
  /* Lexeme might be "<", "<=", "<=>", "<>", "<<", or variable */
  /* Note: this routine relies on =,<,> being constituent characters */

  read_constituent_string();
  if (lexeme.length==1) { lexeme.type = LESS_LEXEME; return; }
  if (lexeme.length==2) {
    if (lexeme.string[1]=='>') { lexeme.type = NOT_EQUAL_LEXEME; return; }
    if (lexeme.string[1]=='=') { lexeme.type = LESS_EQUAL_LEXEME; return; }
    if (lexeme.string[1]=='<') { lexeme.type = LESS_LESS_LEXEME; return; }
  }
  if (lexeme.length==3) {
    if ((lexeme.string[1]=='=')&&(lexeme.string[2]=='>'))
      { lexeme.type = LESS_EQUAL_GREATER_LEXEME; return; }
  }
  determine_type_of_constituent_string();

}

void lex_period (void) {
  store_and_advance();
  finish();
  /* --- if we stopped at '.', it might be a floating-point number, so be
     careful to check for this case --- */
  if (isdigit(current_char)) read_rest_of_floating_point_number();
  if (lexeme.length==1) { lexeme.type = PERIOD_LEXEME; return; }
  determine_type_of_constituent_string();
}

void lex_plus (void) {
  /* Lexeme might be +, number, or symbol */
  /* Note: this routine relies on various things being constituent chars */
  int i;
  bool could_be_floating_point;
  
  read_constituent_string();
  /* --- if we stopped at '.', it might be a floating-point number, so be
     careful to check for this case --- */
  if (current_char=='.') {
    could_be_floating_point = TRUE;
    for (i=1; i<lexeme.length; i++)
      if (! isdigit(lexeme.string[i])) could_be_floating_point = FALSE;
    if (could_be_floating_point) read_rest_of_floating_point_number();
  }
  if (lexeme.length==1) { lexeme.type = PLUS_LEXEME; return; }
  determine_type_of_constituent_string();
}
      
void lex_minus (void) {
  /* Lexeme might be -, -->, number, or symbol */
  /* Note: this routine relies on various things being constituent chars */
  int i;
  bool could_be_floating_point;

  read_constituent_string();
  /* --- if we stopped at '.', it might be a floating-point number, so be
     careful to check for this case --- */
  if (current_char=='.') {
    could_be_floating_point = TRUE;
    for (i=1; i<lexeme.length; i++)
      if (! isdigit(lexeme.string[i])) could_be_floating_point = FALSE;
    if (could_be_floating_point) read_rest_of_floating_point_number();
  }
  if (lexeme.length==1) { lexeme.type = MINUS_LEXEME; return; }
  if (lexeme.length==3) {
    if ((lexeme.string[1]=='-')&&(lexeme.string[2]=='>'))
      { lexeme.type = RIGHT_ARROW_LEXEME; return; }
  }
  determine_type_of_constituent_string();
}

void lex_digit (void) {
  int i;
  bool could_be_floating_point;

  read_constituent_string();
  /* --- if we stopped at '.', it might be a floating-point number, so be
     careful to check for this case --- */
  if (current_char=='.') {
    could_be_floating_point = TRUE;
    for (i=1; i<lexeme.length; i++)
      if (! isdigit(lexeme.string[i])) could_be_floating_point = FALSE;
    if (could_be_floating_point) read_rest_of_floating_point_number();
  }
  determine_type_of_constituent_string();
}

void lex_unknown (void) {
  if(reading_from_top_level() && current_char == 0)
    print("\nSoar> ");
  else {
    print ("Error:  Unknown character encountered by lexer, code=%d\n", 
           current_char);
    print ("File %s, line %lu, column %lu.\n", current_file->filename,
           current_file->current_line, current_file->current_column);
  }
  get_next_char();
  get_lexeme();
}

void lex_constituent_string (void) {
  read_constituent_string();
  determine_type_of_constituent_string();
}

void lex_vbar (void) {
  lexeme.type = SYM_CONSTANT_LEXEME;
  get_next_char();
  do {
    if ((current_char==EOF_AS_CHAR)||(lexeme.length==MAX_LEXEME_LENGTH)) {
      print ("Error:  opening '|' without closing '|'\n");
      print_location_of_most_recent_lexeme();
      /* BUGBUG if reading from top level, don't want to signal EOF */
      lexeme.type = EOF_LEXEME;
      lexeme.string[0]=EOF_AS_CHAR;
      lexeme.string[1]=0;
      lexeme.length = 1;
      return;
    }
    if (current_char=='\\') {
      get_next_char();
      lexeme.string[lexeme.length++] = (char)current_char;
      get_next_char();
    } else if (current_char=='|') {
      get_next_char();
      break;
    } else {
      lexeme.string[lexeme.length++] = (char)current_char;
      get_next_char();
    }
  } while(TRUE);
  lexeme.string[lexeme.length]=0;
}

void lex_quote (void) {
  lexeme.type = QUOTED_STRING_LEXEME;
  get_next_char();
  do {
    if ((current_char==EOF_AS_CHAR)||(lexeme.length==MAX_LEXEME_LENGTH)) {
      print ("Error:  opening '\"' without closing '\"'\n");
      print_location_of_most_recent_lexeme();
      /* BUGBUG if reading from top level, don't want to signal EOF */
      lexeme.type = EOF_LEXEME;
      lexeme.string[0]=EOF_AS_CHAR;
      lexeme.string[1]=0;
      lexeme.length = 1;
      return;
    }
    if (current_char=='\\') {
      get_next_char();
      lexeme.string[lexeme.length++] = (char)current_char;
      get_next_char();
    } else if (current_char=='"') {
      get_next_char();
      break;
    } else {
      lexeme.string[lexeme.length++] = (char)current_char;
      get_next_char();
    }
  } while(TRUE);
  lexeme.string[lexeme.length]=0;
}

/* ======================================================================
                             Get lexeme

  This is the main routine called from outside the lexer.  It reads past 
  any whitespace, then calls some lex_xxx routine (using the lexer_routines[]
  table) based on the first character of the lexeme.
====================================================================== */

void get_lexeme (void) {
  lexeme.length = 0;
  lexeme.string[0] = 0;
  while (TRUE) {
    if (current_char==EOF_AS_CHAR) break;
    if (whitespace[(unsigned char)current_char]) {
      if ((current_char=='\n') && current_file->fake_rparen_at_eol) {
        do_fake_rparen();
        return;
      }
      get_next_char();
      continue;
    }
    if (current_char==';') {
      /* --- read from semicolon to end-of-line --- */
      while ((current_char!='\n')&&(current_char!=EOF_AS_CHAR))
        get_next_char();
      if (current_file->fake_rparen_at_eol) {
        do_fake_rparen();
        return;
      }
      if (current_char!=EOF_AS_CHAR) get_next_char();
      continue;
    }
    if (current_char=='#') {
      /* --- comments surrounded by "#|" and "|#" delimiters --- */
      record_position_of_start_of_lexeme(); /* in case of later error mesg. */
      get_next_char();
      if (current_char!='|') {
        print ("Error: '#' not followed by '|'\n");
        print_location_of_most_recent_lexeme();
        continue;
      }
      get_next_char();  /* consume the vbar */
      while (TRUE) {
        if (current_char==EOF_AS_CHAR) {
          print ("Error: '#|' without terminating '|#'\n");
          print_location_of_most_recent_lexeme();
          break;
        }
        if (current_char!='|') { get_next_char(); continue; }
        get_next_char();
        if (current_char=='#') break;
      }
      get_next_char();  /* consume the closing '#' */
      continue; /* continue outer while(TRUE), reading more whitespace */
    }
    break; /* if no whitespace, ';', or '#|' found, break out of the loop */
  }
  /* --- no more whitespace, so go get the actual lexeme --- */
  record_position_of_start_of_lexeme();
  if (current_char!=EOF_AS_CHAR)
    (*(lexer_routines[(unsigned char)current_char]))();
  else
    lex_eof();
}
  
/* ======================================================================
                            Init lexer

  This should be called before anything else in this file.  It does all 
  the necessary init stuff for the lexer, and starts the lexer reading from
  standard input.
====================================================================== */

char extra_constituents[] = "$%&*+-/:<=>?_";

void init_lexer (void) {
  int i;

  /* --- setup constituent_char array --- */
  for (i=0; i<256; i++)
    if (isalnum(i)) constituent_char[i]=TRUE; else constituent_char[i]=FALSE;
  for (i=0; i<strlen(extra_constituents); i++)
    constituent_char[extra_constituents[i]]=TRUE;
  
  /* --- setup whitespace array --- */
  for (i=0; i<256; i++)
    if (isspace(i)) whitespace[i]=TRUE; else whitespace[i]=FALSE;

  /* --- setup number_starters array --- */
  for (i=0; i<256; i++)
    if (isdigit(i)) number_starters[i]=TRUE; else number_starters[i]=FALSE;
  number_starters['+']=TRUE;
  number_starters['-']=TRUE;
  number_starters['.']=TRUE;

  /* --- setup lexer_routines array --- */
  for (i=0; i<256; i++) lexer_routines[i] = lex_unknown;
  for (i=0; i<256; i++)
    if (constituent_char[i]) lexer_routines[i] = lex_constituent_string;
  for (i=0; i<256; i++) if (isdigit(i)) lexer_routines[i] = lex_digit;
  lexer_routines['@'] = lex_at;
  lexer_routines['('] = lex_lparen;
  lexer_routines[')'] = lex_rparen;
  lexer_routines['+'] = lex_plus;
  lexer_routines['-'] = lex_minus;
  lexer_routines['~'] = lex_tilde;
  lexer_routines['^'] = lex_up_arrow;
  lexer_routines['{'] = lex_lbrace;
  lexer_routines['}'] = lex_rbrace;
  lexer_routines['!'] = lex_exclamation_point;
  lexer_routines['>'] = lex_greater;
  lexer_routines['<'] = lex_less;
  lexer_routines['='] = lex_equal;
  lexer_routines['&'] = lex_ampersand;
  lexer_routines['|'] = lex_vbar;
  lexer_routines[','] = lex_comma;
  lexer_routines['.'] = lex_period;
  lexer_routines['"'] = lex_quote;

  /* --- initially we're reading from the standard input --- */
  start_lex_from_file ("[standard input]", stdin);
}

/* ======================================================================
                   Print location of most recent lexeme

  This routine is used to print an indication of where a parser or interface
  command error occurred.  It tries to print out the current source line
  with a pointer to where the error was detected.  If the current source
  line is no longer available, it just prints out the line number instead.

  BUGBUG: if the input line contains any tabs, the pointer comes out in
  the wrong place.
====================================================================== */

void print_location_of_most_recent_lexeme (void) {
  int i;
  
  if (current_file->line_of_start_of_last_lexeme ==
      current_file->current_line) {
    /* --- error occurred on current line, so print out the line --- */
    if (! reading_from_top_level())
      print ("File %s, line %lu:\n", current_file->filename,
             current_file->current_line);
    if (current_file->buffer[strlen(current_file->buffer)-1]=='\n')
      print_string (current_file->buffer);
    else
      print ("%s\n",current_file->buffer);
    for (i=0; i<current_file->column_of_start_of_last_lexeme; i++)
      print_string ("-");
    print_string ("^\n");
  } else {
    /* --- error occurred on a previous line, so just give the position --- */
    print ("File %s, line %lu, column %lu.\n", current_file->filename,
           current_file->line_of_start_of_last_lexeme,
           current_file->column_of_start_of_last_lexeme + 1);
  }
}

/* ======================================================================
                       Parentheses Utilities

  Current_lexer_parentheses_level() returns the current level of parentheses
  nesting (0 means no open paren's have been encountered).

  Skip_ahead_to_balanced_parentheses() eats lexemes until the appropriate
  closing paren is found (0 means eat until back at the top level).
  
  Fake_rparen_at_next_end_of_line() tells the lexer to insert a fake
  R_PAREN_LEXEME token the next time it reaches the end of a line.
====================================================================== */

int current_lexer_parentheses_level (void) {
  return current_file->parentheses_level;
}

void skip_ahead_to_balanced_parentheses (int parentheses_level) {
  while (TRUE) {
    if (lexeme.type==EOF_LEXEME) return;
    if ((lexeme.type==R_PAREN_LEXEME) &&
        (parentheses_level==current_file->parentheses_level)) return;
    get_lexeme();
  }
}

void fake_rparen_at_next_end_of_line (void) {
  current_file->parentheses_level++;  
  current_file->fake_rparen_at_eol = TRUE;  
}

/* ======================================================================
                        Set lexer allow ids

  This routine should be called to tell the lexer whether to allow
  identifiers to be read.  If FALSE, things that look like identifiers
  will be returned as SYM_CONSTANT_LEXEME's instead.
====================================================================== */

void set_lexer_allow_ids (bool allow_identifiers) {
  current_file->allow_ids = allow_identifiers;
}

/* ======================================================================
               Determine possible symbol types for string

  This is a utility routine which figures out what kind(s) of symbol a 
  given string could represent.  At entry:  s, length_of_s represent the
  string.  At exit:  possible_xxx is set to TRUE/FALSE to indicate
  whether the given string could represent that kind of symbol; rereadable
  is set to TRUE indicating whether the lexer would read the given string
  as a symbol with exactly the same name (as opposed to treating it as a
  special lexeme like "+", changing upper to lower case, etc.
====================================================================== */

void determine_possible_symbol_types_for_string (char *s,
                                                 int length_of_s,
                                                 bool *possible_id,
                                                 bool *possible_var,
                                                 bool *possible_sc,
                                                 bool *possible_ic,
                                                 bool *possible_fc,
                                                 bool *rereadable) {
  char *ch;
  bool rereadability_dead, rereadability_questionable;

  *possible_id = FALSE;
  *possible_var = FALSE;
  *possible_sc = FALSE;
  *possible_ic = FALSE;
  *possible_fc = FALSE;
  *rereadable = FALSE;

  /* --- check if it's an integer or floating point number --- */
  if (number_starters[(unsigned char)(*s)]) {
    ch = s;
    if ((*ch=='+')||(*ch=='-')) ch++;  /* optional leading + or - */
    while (isdigit(*ch)) ch++;         /* string of digits */
    if ((*ch==0)&&(isdigit(*(ch-1))))
      *possible_ic = TRUE;
    if (*ch=='.') {
      ch++;                              /* decimal point */
      while (isdigit(*ch)) ch++;         /* string of digits */
      if ((*ch=='e')||(*ch=='E')) {
        ch++;                              /* E */
        if ((*ch=='+')||(*ch=='-')) ch++;  /* optional leading + or - */
        while (isdigit(*ch)) ch++;         /* string of digits */
      }
      if (*ch==0) *possible_fc = TRUE;
    }
  }

  /* --- make sure it's entirely constituent characters --- */
  for (ch=s; *ch!=0; ch++)
    if (! constituent_char[(unsigned char)(*ch)]) return;

  /* --- check for rereadability --- */
  rereadability_questionable = FALSE;
  rereadability_dead = FALSE;
  for (ch=s; *ch!=0; ch++) {
    if (islower(*ch) || isdigit(*ch)) continue; /* these guys are fine */
    if (isupper(*ch)) { rereadability_dead = TRUE; break; }
    rereadability_questionable = TRUE;
  }
  if (! rereadability_dead) {
    if ((! rereadability_questionable) ||
        (length_of_s >= LENGTH_OF_LONGEST_SPECIAL_LEXEME) ||
        ((length_of_s==1)&&(*s=='*')))
      *rereadable = TRUE;
  }

  /* --- any string of constituents could be a sym constant --- */
  *possible_sc = TRUE;
  
  /* --- check whether it's a variable --- */
  if ((*s=='<')&&(*(s+length_of_s-1)=='>')) *possible_var = TRUE;

  /* --- check if it's an identifier --- */
  if (isalpha(*s)) {
    /* --- is the rest of the string an integer? --- */
    ch = s+1;
    while (isdigit(*ch)) ch++;         /* string of digits */
    if ((*ch==0)&&(isdigit(*(ch-1)))) *possible_id = TRUE;
  }
}
