;;; scanner using data structure representations

;;; ****************************************************************

;;; streams  -- same as in functional version

(define stream-get
  (lambda (stream rcvr)
    (let ((the-pair (stream)))
      (rcvr (car the-pair) (cdr the-pair)))))

(define make-constant-stream
  (lambda (c)
    (lambda ()
      (cons c (make-constant-stream c)))))

(define stream->list
  (lambda (stream end-of-stream?)
    (stream-get stream
      (lambda (val newstream)
        (if (end-of-stream? val) '()
          (cons val (stream->list newstream end-of-stream?)))))))

(define list->stream
  (lambda (l)
    (if (null? l)
      (make-constant-stream '())
      (lambda ()
        (cons
          (car l)
          (list->stream (cdr l)))))))

(define string->stream
  (lambda (str)
    (let ((length (string-length str)))
      (letrec 
        ((chars-from (lambda (i)
                       (lambda ()
                         (cons
                           (if (>= i length)
                             #\^
                             (string-ref str i))
                           (chars-from (+ i 1)))))))
        (chars-from 0)))))

;;; ****************************************************************

;;; Data Structures

(define-record scanner-result (token char stream))
(define-record token (class data))

;;; ****************************************************************

;;; Cookers

(define cook-identifier
  (lambda (buffer)
    (let ((sym 
            (string->symbol
              (list->string buffer))))
      (if (memq sym **keywords-list**)
        (make-token sym #f)
        (make-token 'identifier sym)))))

(define cook-number
  (lambda (buffer)
    (make-token 'number
      (string->number (list->string buffer)))))

(define apply-cooker
  (lambda (cooker char-list)
    (case cooker
      ((cook-identifier) (cook-identifier char-list))
      ((cook-number) (cook-number char-list))
      (else
	(if (symbol? cooker)
	  (make-token cooker #f)        ; make delimiter token
	  (else (error 'apply-cooker
		  "unknown cooker ~s" cooker)))))))

;;; ****************************************************************

;;; Testers

(define apply-tester
  (lambda (tester ch)
    (cond
      ((char? tester) (char=? tester ch))
      ((eq? tester 'else) #t)
      (else
	(case tester
	  ((whitespace) (char-whitespace? ch))
	  ((alphabetic) (char-alphabetic? ch))
	  ((numeric) (char-numeric? ch))
	  (else (error 'apply-tester "unknown tester ~s" tester)))))))


;;; ****************************************************************

;;; Main loop

(define apply-automaton
  (lambda (automaton state buf c stream)
    (let ((opcode (car state))
          (next-state (cadr state)))     
      '(printf "apply-state: opcode = ~s c = ~s~%" opcode c)
      (case opcode
        ((shift)
         (stream-get stream
           (lambda (char1 new-stream)
             (apply-automaton automaton
               next-state (cons c buf) char1 new-stream))))
        ((drop)
         (stream-get stream
           (lambda (char1 new-stream)
             (apply-automaton automaton
               next-state buf char1 new-stream))))
        ((emit)
         (let ((cooker (cadr state)))
           (let ((token (apply-cooker cooker (reverse buf))))
             (printf "emitting token ~s~n" token)
             (make-scanner-result token c stream))))
        ((cond)
         (apply-automaton automaton
           (apply-scanner-cond (cdr state) c)
           buf c stream))
        ((goto)
         (apply-automaton automaton
           (cadr (assq next-state automaton))
           buf c stream))
        ((fail)
         (let ((msg (cadr state)))
           (error 'apply-automaton
             "scanner failed in state ~s on input ~s"
             msg c)))
        ((debug-state)
         (let ((msg (cadr state))
               (next-state (caddr state)))
           (printf "~s ~s ~s~%" msg buf c)
           (apply-automaton automaton
             next-state buf c stream)))
        (else
          (error 'apply-automaton
            "bad opcode in state ~s" state))))))

(define apply-scanner-cond
  (lambda (alternatives c)
    (letrec
      ((loop (lambda (alts)
               (if (null? alts)
                 (error 'apply-state
                   "couldn't match character ~s in state ~%~s"
                   c alternatives)
                 (let ((alt1 (car alts)))
;                   (printf "apply-scanner-cond: c = ~s alternative = ~s~%"
;                      c alt1)
                   (if (apply-tester (car alt1) c)
                     (cadr alt1)
                     (loop (cdr alts))))))))
      (loop alternatives))))


;;; ****************************************************************

;;; Scaffolding

(define automaton->stream-transducer
  (lambda (automaton)
    (letrec
      ((loop (lambda (char stream)
                 (let
                   ((next-result 
                      (apply-automaton automaton
                        (automaton->start-state automaton)
                        '() char stream)))
                   (let ((next-token
                           (scanner-result->token next-result))
                         (next-char
                           (scanner-result->char next-result))
                         (next-stream
                           (scanner-result->stream next-result)))
                     (cons next-token
                           (lambda ()
                             (loop next-char next-stream))))))))
      (lambda (stream)
        (lambda ()
          (stream-get stream loop))))))

(define automaton->start-state
  (lambda (automaton)
    (cadr (car automaton))))


(define driver-1 
  (lambda (automaton string)
    (stream->list
      (let ((transducer (automaton->stream-transducer automaton))
            (stream (string->stream string)))
        (transducer stream))
      (lambda (token)
        (eq? (token->class token) 'end-marker)))))

;;; ****************************************************************

;;; Example automaton

(define automaton-1
  '((start-state 
      (cond
        (whitespace (drop (goto start-state)))
        (alphabetic (shift (goto identifier-state)))
        (numeric (shift (goto number-state)))
        (#\+ (drop (emit plus-sym)))
        (#\: (shift (goto assign-sym-state)))
        (#\% (drop (goto comment-state)))
        (#\; (drop (emit semicolon)))
        (#\( (drop (emit lparen)))
        (#\) (drop (emit rparen)))
        (#\^ (emit end-marker))
        (else (fail start-state))))
    (identifier-state 
      (cond
        (alphabetic (shift (goto identifier-state)))
        (numeric (shift (goto identifier-state)))
        (else (emit cook-identifier))))
    (number-state 
      (cond
        (numeric (shift (goto number-state)))
        (else (emit cook-number))))
    (assign-sym-state
      (cond
        (#\= (shift (emit assign-sym)))
        (else (shift (goto identifier-state)))))
    (comment-state 
      (cond
        (#\newline (drop (goto start-state)))
        (#\^ (goto start-state))
        (else (drop (goto comment-state)))))))

(define **keywords-list** '())

; > (driver-1 automaton-1 "abc
; def
; % comment
; xyz 13")
; emitting token (token identifier abc)
; emitting token (token identifier def)
; emitting token (token identifier xyz)
; emitting token (token number 13)
; emitting token (token end-marker #f)
; ((token identifier abc)
;  (token identifier def)
;  (token identifier xyz)
;  (token number 13))
; > 


