// Lexer.java

// Date: 3/1/98
// Author: Adam Berger
// Description: Lexer routines for SL project 

import java.io.*;


public class Lexer implements TokenTypes {
  
  // state names of the finite-state machine accepting strings of SL lexemes
  private static final int S_START         = 0;
  private static final int S_IDENTIFIER    = 1;
  private static final int S_LETTER        = 2;
  private static final int S_DIGIT         = 3;
  private static final int S_LT_UNRESOLVED = 4; // seen <
  private static final int S_GT_UNRESOLVED = 5; // seen >
  private static final int S_EQ_UNRESOLVED = 6; // seen =
  private static final int S_EX_UNRESOLVED = 7; // seen !

  // equivalence classes of characters, for use in FSM 'switch' clause below 
  private static final int C_SPACE      = 0;   // space, tab, newline, etc.
  private static final int C_ATOMIC     = 1;   // defined in TokenTypes
  private static final int C_LETTER     = 2;   
  private static final int C_DIGIT      = 3;  
  private static final int C_LT         = 4;   // <
  private static final int C_GT         = 5;   // >
  private static final int C_EQ         = 6;   // =
  private static final int C_EX         = 7;   // !
  
  private int         state;          // current state of FSM 
  private int         lastChar, nextChar;            
  private int         currentLine;
  private SymbolTable st; 
  private Token       pushedBackToken; 
  private boolean     pushedBack;
  private InputStreamReader isReader;
  private boolean     qEOF;

  Lexer(InputStream is) {
    isReader = new InputStreamReader(is);
    st = new SymbolTable();
    state=S_START;
    qEOF=false;
    nextChar=-1;
    currentLine=1;
    boolean pushedBack=false;
  }

  public void putBack(Token t) {
    pushedBackToken = t;
    pushedBack=true;
  }

  public Token getToken() throws LexerException {
    Token t;
    if (pushedBack==true) {
      pushedBack=false;
      return pushedBackToken;
    }
    t=internalGetToken(isReader);
    return t;
  }

  private Token internalGetToken(InputStreamReader isReader) throws LexerException {
    String lexeme="";         // gradually built up as characters are read

    for (boolean done=false; !done; ) {
      
      // need another character? If so, read from stdin.
      if (nextChar==-1) {
	try {
	  nextChar = isReader.read();
	}
	catch (java.io.IOException e) {
	  throw new LexerException(currentLine);
	}

	if (nextChar=='\n') currentLine++;
	if (nextChar==-1) {
	  // Reached end of file. 
	  qEOF=true;
	  if (lexeme=="") return new Token (T_EOF);
	  else {
	    // flush the current lexeme.
	    st.insertLexeme(lexeme); 
	    return new Token(st.tokenOfLexeme(lexeme), lexeme);
	  }
	}
      }
      
      int charType = typeOfChar((char) nextChar);
      
      // transition to new state based on next character in input stream 
      // The FSM is encoded in the following nested switch statements: the outer switch
      // is the current state, and the inner switches are the symbol read.
      switch(state) {
	
      case S_START:
	switch(charType) {
	case C_SPACE:
	  break;
	case C_ATOMIC:
	  lexeme+=(char) nextChar;
	  done=true;
	  break;
	default:
	  state=charType;
	  lexeme+=(char) nextChar;
	  lastChar=nextChar;
	}
	nextChar=-1;
	break;

      case S_LETTER:
	switch(charType) {
	case C_LETTER:
	  lexeme+=(char)nextChar;
	  nextChar=-1;
	  break;
	case C_DIGIT:
	  state=S_IDENTIFIER;
	  lexeme+=(char)nextChar;
	  nextChar=-1;
	  break;
	default:
	  done=true;
	  state=S_START;
	}
	break;

      case S_DIGIT:
	switch(charType) {
	case C_DIGIT:
	  lexeme+=(char)nextChar;
	  nextChar=-1;
	  break;
	case C_LETTER:
	  throw new LexerException(currentLine);	  
	default:
	  done=true;
	  state=S_START;
	}
	break;
	
      case S_IDENTIFIER:
	switch(charType) {
	case C_DIGIT:
	case C_LETTER:
	  lexeme+=(char)nextChar;
	  nextChar=-1;
	  break;
	default:
	  done=true;
	  state=S_START;
	}
	break;
	
      case S_LT_UNRESOLVED:
      case S_GT_UNRESOLVED:
      case S_EQ_UNRESOLVED:
      case S_EX_UNRESOLVED:
	if (nextChar=='=') {
	  lexeme+=(char)nextChar;
	  nextChar=-1;
	}
	done=true;
	state=S_START;
	break;
	
      default: throw new Error("internal lexer error");
      }
    }  
    st.insertLexeme(lexeme); // idempotent if lexeme already there
    return new Token(st.tokenOfLexeme(lexeme), lexeme);
  }
  
  public SymbolTable getSymbolTable() { return st; }  

  public int getCurrentLine() { return currentLine; }

  private int typeOfChar(char c) throws LexerException {
    if (Character.isLetter(c)) return C_LETTER;
    if (Character.isDigit(c))  return C_DIGIT;
    if (c=='>') return C_GT;
    if (c=='<') return C_LT;
    if (c=='=') return C_EQ;
    if (c=='!') return C_EX;
    String s = (new Character(c)).toString();
    if (memberOfList(s, atomicCharacters)) return C_ATOMIC;
    if (memberOfList(s, spaces)) return C_SPACE;
    throw new LexerException(currentLine);
  }

  private boolean memberOfList(String s, String [] list) {
    for (int i=0; i<list.length; i++) 
      if (s.equals(list[i])) return true;
    return false;
  }
}
