;;; Scanner Generator

;;; Source:  Book, Sec 3.1, modified for Scheme84.
;;; Modified for Chez Scheme Fri Sep 30 11:00:18 1988

;;; ******************************************************************

;;; data structure definitions

;;; for automata    

;;; automaton ::= (state*)
;;; state ::= (state-name action transition*)
;;; state-name ::= symbol
;;; action ::= expression | bogus
;;; transition ::= (input-spec state-name)
;;; input-spec ::= char | (char char)

;;; the automaton has two special states named initial, and end-marker.
;;; initial is the initial state.
;;; end-marker is a state with no transitions; its action is used to
;;; produce and end-marker item in the resulting list of values.

(define state->name car)
(define state->action cadr)
(define state->transitions caddr)

(define transition->input-spec car)
(define transition->next-state cadr)

;;; items  -- used internally to pass items and remainders around

;;; item ::= (*bogus unused-chars)
;;; item ::= (*ordinary unused-chars value)

(define make-bogus-item (lambda (chars) (list '*bogus chars)))
(define make-real-item
  (lambda (value chars) (list '*ordinary  chars value)))

(define item->unused cadr)
(define item->value caddr)

;;; lexical-item -- used externally to pass items to parser

;;; lexical-item ::= (class data)
;;; value is () for a token

(define lexical-item->class car)
(define lexical-item->data cadr)
(define make-lexical-item (lambda (class data) (list class data)))

;;; *************************************************************

;;; Top-level macro-- define-scanner

;;; the actions are evaluated at scanner-construction time, everything
;;; else is not.

(extend-syntax (define-scanner)
  [(define-scanner name
     (state-name action transition ...) ...)
   (define name
     (let ([automaton
	     (list (list 'state-name
			 (lambda (chars) (action chars)) ; action is
							 ; dynamically bound.
			 '(transition ...)) ...)])
       (set! automaton-1 automaton)
       (lambda (string) (scan-string automaton string))))])
     
;;; ****************************************************************  

;;; Top-level Function--  scan-string 

;;; scan-string:  takes an automaton, a string, and an end-marker, and
;;; returns a list of items (determined by the actions in the
;;; automaton), terminated by the end-marker item.

(define scan-string
  (lambda (automaton input-string)
    (let loop ([index 0])
      (if (>= index (string-length input-string))
	(list (end-marker-action automaton))
	(let ([item (next-item-raw automaton input-string index)])
	  ;;(printf "scan-string: item = ~s~%" item)
	  (case (car item)
	    [(*bogus) (loop (item->unused item))]
	    [(*ordinary)
	     (cons (item->value item)
		   (loop (item->unused item)))]))))))

;;;**********************************************************************

;;; Simulate Automaton

;;; Given a list, a predicate, and a failure value, return the first
;;; element in the list that satisfies the predicate.  If no such
;;; element exists, return #f

(define linear-search
  (lambda (l predicate)
    (cond
      [(null? l) #f]
      [(predicate (car l)) (car l)]
      [else (linear-search (cdr l) predicate)])))

(define find-state
  (lambda (automaton state-name)
    (linear-search
      automaton
      (lambda (s) (eq? (state->name s) state-name)))))

;;; Given a state and an input-char, find the next state-name if any.
;;; Otherwise return #f. 

(define find-transition
  (lambda (state input-char)
    '(printf "find-transition: state = ~s input-char = ~s~%"
       state input-char)
    (linear-search
      (state->transitions state)
      (lambda (transition)
	'(printf "transition: ~s input-char: ~s~%" transition input-char)
	(let ([input-spec (transition->input-spec transition)])
	  '(printf "find-transition: input-spec = ~s input-char = ~s~%"
		 input-spec input-char)
	       (cond
		 ((char? input-spec)	
		  ;; it's a single character:
		  (eqv? input-char input-spec))
		 ;; no, it's a subrange
		 (else
		   (let ((ans (char<=?
				(car input-spec)
				input-char
				(cadr input-spec))))
		     ans))))))))


;;; simulate automaton and collect characters.

(define next-item-raw
  (lambda (automaton input-string index)
    (let main-loop
      ([state-name 'initial][char-buffer '()][index index])
      (let ([state (find-state automaton state-name)])
	'(printf "next-item-raw: state-name = ~s state = ~s~%"
	  state-name state)
	(cond
	  [(null? state)
	   ;; bad state-name
	   (error 'next-item-raw "can't find state named ~s" state-name)]
	  [(>= index (string-length input-string))
	   ;; this is the end of the string, so it must be the of
	   ;; the item.
	   '(printf "next-item-raw: found end of string in state ~s~%"
	      state-name)
	   (make-item 
	     (state->action state)
	     (reverse char-buffer)
	     index)]
	  [else
	    (let* ([char (string-ref input-string index)]
		   [transition
		     (find-transition state char)])
	      '(printf "state-name: ~s char: ~s~%" state-name char)
	      '(printf "transition: ~s~%" transition)
	      (cond
		;; if we've found the next transition, accumulate the
		;; character in the char-buffer, and go on to the next
		;; character:
		[(not (eq? transition #f))
		 (main-loop
		   (transition->next-state transition)
		   (cons char char-buffer)
		   (+ index 1))]
		;; no transition applies, so we must be looking at
		;; the first character of the next item
		[else
		  (make-item
		    (state->action state)
		    (reverse char-buffer)
		    index)]))])))))

(define make-item
  (lambda (action buffer unused)
    (let ((value (action buffer)))
      (if (eq? value 'bogus)
	(make-bogus-item unused)
	(make-real-item value unused)))))


(define end-marker-action
  (lambda (automaton)
    (let ([state (find-state automaton 'end-marker)])
      (if state
	((state->action state) '())
	(error 'end-marker-action
	    "can't find state named end-marker")))))

;;; **********************************************************************

;;; Example:

(define cook-symbol
  (lambda (chars)
    (let ([sym (char-list->symbol chars)])
      (if (memq sym **keywords-list**)
             (make-lexical-item sym '())
             (make-lexical-item 'symbol sym))))) ; this will change
						  ; to symbol eventually

(define **keywords-list** '(let in if then else set!))

(define cook-bogus
  (lambda (chars) 'bogus))

(define cook-number
  (lambda (chars)
    (make-lexical-item 'int (digit-list->number chars))))

(define lex-error
  (lambda (chars)
    (error 'lex-error "unknown lexical item ~s" chars)))

(define cook-token
  (lambda (token)
    (lambda (chars)
      (make-lexical-item token '()))))

(define char-list->symbol 
  (lambda (char-list) (string->symbol (list->string char-list))))

;;; digit-list->number converts a list of decimal digits to a number.
;;; It uses the fact that the ASCII characters for 0..9 are
;;; consecutive, starting with 0.

(define digit-list->number
  (let ([code-for-zero (char->integer #\0)])
    (lambda (char-list)
      (letrec
       ([loop (lambda (accum char-list)
                (if (null? char-list)
                    accum
                    (loop
                     (+ (* 10 accum)
                        (- (char->integer (car char-list))
                           code-for-zero))
                     (cdr char-list))))])
       (loop 0 char-list)))))



;;; *******************************************************************

;;; Reasonable lexical spec:

(define-scanner scanner-1
  (initial lex-error
    (#\space whitespace)
    (#\tab  whitespace)
    (#\newline whitespace)
    ((#\a #\z) symbol)
    ((#\A #\Z) symbol)
    (#\, comma)
    ((#\* #\/) symbol)		; except "," 
    (#\= symbol)
    (#\( lparen)
    (#\) rparen)
    (#\% comment)
    (#\; semicolon)
    ((#\0 #\9) number))
  (symbol cook-symbol
    ((#\a #\z) symbol)
    ((#\A #\Z) symbol)
    ((#\0 #\9) symbol)
    (#\* symbol)
    (#\+ symbol)
    (#\- symbol)
    (#\/ symbol)
    (#\! symbol)
    (#\= symbol))
  (comment cook-bogus
    ((#\space #\~) comment))
  (whitespace cook-bogus
    (#\space whitespace)
    (#\tab whitespace)
    (#\newline whitespace))
  (number cook-number
    ((#\0 #\9)number))
  (comma (cook-token 'comma))
  (lparen (cook-token 'lparen))
  (rparen (cook-token 'rparen))
  (semicolon (cook-token 'semicolon))
  (end-marker (cook-token 'end-marker)))





