;;; Scanning

;;; ****************************************************************

;;;    make-lazy-stream : (() -> val * stream) -> stream
;;;    stream-get  : stream * (value -> (stream -> answer)) -> answer

(define make-lazy-stream 
  (lambda (th) th))

(define stream-get
  (lambda (stream rcvr)
    (let ((the-pair (stream)))
      (rcvr (car the-pair) (cdr the-pair)))))

;;; ****************************************************************

(define string->stream
  (lambda (str)
    (let ((length (string-length str)))
      (letrec 
        ((loop (lambda (i)
                 (make-lazy-stream
                   (lambda ()
                     (cons
                       (if (>= i length)
                         #\nul
                         (string-ref str i))
                       (loop (+ i 1))))))))
        (loop 0)))))

;; a better version of stream->list, parameterized on end-of-stream?
(define stream->list
  (lambda (end-of-stream? str)
    (stream-get str
      (lambda (val newstr)
        (if (end-of-stream? val) '()
          (cons val (stream->list end-of-stream? newstr)))))))

;;; ****************************************************************
;;; ****************************************************************

;;; Scanner architecture

;;; Scanner = char-stream -> token-stream

;;; State = buf * char * char-stream -> token * char * char-stream

(define-record scanner-result (item char stream))

(define apply-automaton
  (lambda (automaton state buf c str)
    (letrec 
      ((apply-state
         (lambda (state buf c str)
           (let ((opcode (car state))
                 (next-state (cadr state)))     
             '(printf "apply-state: opcode = ~s c = ~s~%" opcode c)
             (case opcode
               ((shift)
                (stream-get str
                  (lambda (c1 str1)
                    (apply-state next-state (cons c buf) c1 str1))))
               ((drop)
                (stream-get str
                  (lambda (c1 str1)
                    (apply-state next-state buf c1 str1))))
               ((emit)
                (let ((cooker (cadr state)))
                  (let ((item (apply-cooker cooker (reverse buf))))
                    (printf "emitting item ~s~n" item)
                    (make-scanner-result item c str))))
               ((cond)
                (apply-state
                  (apply-scanner-cond (cdr state) c)
                  buf c str))
               ((goto)
                (apply-state
                  (scanner-label->state next-state automaton)
                  buf c str))
               ((fail)
                (let ((msg (cadr state)))
                  (error 'apply-automaton
                    "scanner failed in state ~s on input ~s"
                    msg c)))
               ((debug-state)
                (let ((msg (cadr state))
                      (next-state (caddr state)))
                  (printf "~s ~s ~s~%" msg buf c)
                  (apply-state next-state buf c str))))))))
      (apply-state state buf c str))))  

(define apply-scanner-cond
  (lambda (alternatives c)
    (if (null? alternatives)
      (error 'apply-state
        "couldn't match character ~s in state ~%~s"
        c alternatives)
      (let ((alternative1 (car alternatives)))
        '(printf "apply-scanner-cond: c = ~s alternative = ~s~%"
          c alternative1)
        (if (apply-tester (car alternative1) c)
          (cadr alternative1)
          (apply-scanner-cond (cdr alternatives) c))))))

(define scanner-label->state
  (lambda (label automaton)
    (cadr (assq label automaton))))

(define automaton->start-label caar)

(define automaton->scanner
  (lambda (automaton)
    (letrec
      ((loop (lambda (char stream)
               (make-lazy-stream
                 (lambda ()
                   (record-case
                     (apply-automaton automaton 
                       (scanner-label->state
                         (automaton->start-label automaton) 
                         automaton)
                       '() char stream)
                     (scanner-result (token char stream)
                       (cons token
                             (loop char stream)))))))))
      (lambda (stream)
        (stream-get stream loop)))))
               
;;; driver-1:  from string to list of tokens
(define driver-1 
  (lambda (automaton string)
    (stream->list 
      (lambda (item)
	(record-case item
	  (token (class data) (eq? class 'end-marker))))
      ((automaton->scanner automaton)
       (string->stream string)))))

;;; ****************************************************************

;;; Standard cookers and testers....

;;; Record definitions and cookers

(define-record token (class data))

(define cook-identifier
  (lambda (buffer)
    (let ((sym 
            (string->symbol
              (list->string buffer))))
      (if (memq sym **keywords-list**)
        (make-token sym #t)
        (make-token 'identifier sym)))))

(define cook-number
  (lambda (buffer)
    (make-token 'number
      (string->number (list->string buffer)))))

(define apply-cooker
  (lambda (cooker char-list)
    (case cooker
      ((cook-identifier) (cook-identifier char-list))
      ((cook-number) (cook-number char-list))
      (else
	(if (symbol? cooker)
	  (make-token cooker '())
	  (else (error 'apply-cooker
		  "unknown cooker ~s" cooker)))))))

(define apply-tester
  (lambda (tester ch)
    (cond
      ((char? tester) (eq? tester ch))
      ((eq? tester 'else) #t)
      (else
	(case tester
	  ((whitespace) (char-whitespace? ch))
	  ((alphabetic) (char-alphabetic? ch))
	  ((numeric) (char-numeric? ch))
	  (else (error 'apply-tester "unknown tester ~s" tester)))))))


;;; ****************************************************************

;;; OK, let's build one now....

(define automaton-1
  '((start-state 
      (cond
        (whitespace (drop (goto start-state)))
        (alphabetic (shift (goto identifier-state)))
        (numeric (shift (goto number-state)))
        (#\+ (drop (emit plus-sym)))
        (#\: (shift (goto assign-sym-state)))
        (#\% (drop (goto comment-state)))
        (#\; (drop (emit semicolon)))
        (#\( (drop (emit lparen)))
        (#\) (drop (emit rparen)))
        (#\^ (emit end-marker))         ; alternate end marker
        (#\nul (emit end-marker))
        (else (fail start-state))))
    (identifier-state 
      (cond
        (alphabetic (shift (goto identifier-state)))
        (numeric (shift (goto identifier-state)))
        (else (emit cook-identifier))))
    (number-state 
      (cond
        (numeric (shift (goto number-state)))
        (else (emit cook-number))))
    (assign-sym-state
      (cond
        (#\= (shift (emit assign-sym)))
        (else (shift (goto identifier-state)))))
    (comment-state 
      (cond
        (#\newline (drop (goto start-state)))
        (#\^ (goto start-state))
        (#\nul (goto start-state))
        (else (drop (goto comment-state)))))))

(define **keywords-list** '(begin end if then else))

; > (driver-1 automaton-1 "abc
; def
; % comment
; xyz 13")
; emitting item (token identifier abc)
; emitting item (token identifier def)
; emitting item (token identifier xyz)
; emitting item (token number 13)
; emitting item (token end-marker ())
; ((token identifier abc)
;  (token identifier def)
;  (token identifier xyz)
;  (token number 13))

;; and here's what the answer should give:

; > (driver-2 automaton-1)
; driver-2> abc
; emitting item (token identifier abc)
; def
; emitting item (token identifier def)
; % comment
; xyz 13^
; emitting item (token identifier xyz)
; emitting item (token number 13)
; emitting item (token end-marker ())
; ((token identifier abc)
;  (token identifier def)
;  (token identifier xyz)
;  (token number 13))
; > 
