;;; parser construction

(printf "parse-utils.s Mon Apr 26 11:55:14 1993~%")

;;; Using List-of-frames representation

;;; ****************************************************************
;;; ****************************************************************
;;; ****************************************************************

;;; include scan-ds.s:

;;; scanner using data structure representations

;;; ****************************************************************

;;; streams  -- same as in functional version

(define stream-get
  (lambda (stream rcvr)
    (let ((the-pair (stream)))
      (rcvr (car the-pair) (cdr the-pair)))))

(define make-constant-stream
  (lambda (c)
    (lambda ()
      (cons c (make-constant-stream c)))))

(define stream->list
  (lambda (stream end-of-stream?)
    (stream-get stream
      (lambda (val newstream)
        (if (end-of-stream? val) '()
          (cons val (stream->list newstream end-of-stream?)))))))

(define list->stream
  (lambda (l)
    (if (null? l)
      (make-constant-stream '())
      (lambda ()
        (cons
          (car l)
          (list->stream (cdr l)))))))

(define string->stream
  (lambda (str)
    (let ((length (string-length str)))
      (letrec 
        ((chars-from (lambda (i)
                       (lambda ()
                         (cons
                           (if (>= i length)
                             #\^
                             (string-ref str i))
                           (chars-from (+ i 1)))))))
        (chars-from 0)))))

;;; ****************************************************************

;;; Data Structures

(define-record scanner-result (token char stream))
(define-record token (class data))

;;; ****************************************************************

;;; Cookers

(define cook-identifier
  (lambda (buffer)
    (let ((sym 
            (string->symbol
              (list->string buffer))))
      (if (memq sym **keywords-list**)
        (make-token sym #f)
        (make-token 'identifier sym)))))

(define cook-number
  (lambda (buffer)
    (make-token 'number
      (string->number (list->string buffer)))))

(define apply-cooker
  (lambda (cooker char-list)
    (case cooker
      ((cook-identifier) (cook-identifier char-list))
      ((cook-number) (cook-number char-list))
      (else
	(if (symbol? cooker)
	  (make-token cooker #f)        ; make delimiter token
	  (else (error 'apply-cooker
		  "unknown cooker ~s" cooker)))))))

;;; ****************************************************************

;;; Testers

(define apply-tester
  (lambda (tester ch)
    (cond
      ((char? tester) (char=? tester ch))
      ((eq? tester 'else) #t)
      (else
	(case tester
	  ((whitespace) (char-whitespace? ch))
	  ((alphabetic) (char-alphabetic? ch))
	  ((numeric) (char-numeric? ch))
	  (else (error 'apply-tester "unknown tester ~s" tester)))))))


;;; ****************************************************************

;;; Main loop

(define apply-automaton
  (lambda (automaton state buf c stream)
    (let ((opcode (car state))
          (next-state (cadr state)))     
      '(printf "apply-state: opcode = ~s c = ~s~%" opcode c)
      (case opcode
        ((shift)
         (stream-get stream
           (lambda (char1 new-stream)
             (apply-automaton automaton
               next-state (cons c buf) char1 new-stream))))
        ((drop)
         (stream-get stream
           (lambda (char1 new-stream)
             (apply-automaton automaton
               next-state buf char1 new-stream))))
        ((emit)
         (let ((cooker (cadr state)))
           (let ((token (apply-cooker cooker (reverse buf))))
             ;(printf "emitting token ~s~n" token)
             (make-scanner-result token c stream))))
        ((cond)
         (apply-automaton automaton
           (apply-scanner-cond (cdr state) c)
           buf c stream))
        ((goto)
         (apply-automaton automaton
           (cadr (assq next-state automaton))
           buf c stream))
        ((fail)
         (let ((msg (cadr state)))
           (error 'apply-automaton
             "scanner failed in state ~s on input ~s"
             msg c)))
        ((debug-state)
         (let ((msg (cadr state))
               (next-state (caddr state)))
           (printf "~s ~s ~s~%" msg buf c)
           (apply-automaton automaton
             next-state buf c stream)))
        (else
          (error 'apply-automaton
            "bad opcode in state ~s" state))))))

(define apply-scanner-cond
  (lambda (alternatives c)
    (letrec
      ((loop (lambda (alts)
               (if (null? alts)
                 (error 'apply-state
                   "couldn't match character ~s in state ~%~s"
                   c alternatives)
                 (let ((alt1 (car alts)))
;                   (printf "apply-scanner-cond: c = ~s alternative = ~s~%"
;                      c alt1)
                   (if (apply-tester (car alt1) c)
                     (cadr alt1)
                     (loop (cdr alts))))))))
      (loop alternatives))))


;;; ****************************************************************

;;; Scaffolding

(define automaton->stream-transducer
  (lambda (automaton)
    (letrec
      ((loop (lambda (char stream)
                 (let
                   ((next-result 
                      (apply-automaton automaton
                        (automaton->start-state automaton)
                        '() char stream)))
                   (let ((next-token
                           (scanner-result->token next-result))
                         (next-char
                           (scanner-result->char next-result))
                         (next-stream
                           (scanner-result->stream next-result)))
                     (cons next-token
                           (lambda ()
                             (loop next-char next-stream))))))))
      (lambda (stream)
        (lambda ()
          (stream-get stream loop))))))

(define automaton->start-state
  (lambda (automaton)
    (cadr (car automaton))))


(define driver-1 
  (lambda (automaton string)
    (stream->list
      (let ((transducer (automaton->stream-transducer automaton))
            (stream (string->stream string)))
        (transducer stream))
      (lambda (token)
        (eq? (token->class token) 'end-marker)))))

;;; ****************************************************************

;;; Example automaton

(define automaton-1
  '((start-state 
      (cond
        (whitespace (drop (goto start-state)))
        (alphabetic (shift (goto identifier-state)))
        (numeric (shift (goto number-state)))
        (#\+ (drop (emit plus-sym)))
        (#\: (shift (goto assign-sym-state)))
        (#\% (drop (goto comment-state)))
        (#\; (drop (emit semicolon)))
        (#\( (drop (emit lparen)))
        (#\) (drop (emit rparen)))
        (#\^ (emit end-marker))
        (else (fail start-state))))
    (identifier-state 
      (cond
        (alphabetic (shift (goto identifier-state)))
        (numeric (shift (goto identifier-state)))
        (else (emit cook-identifier))))
    (number-state 
      (cond
        (numeric (shift (goto number-state)))
        (else (emit cook-number))))
    (assign-sym-state
      (cond
        (#\= (shift (emit assign-sym)))
        (else (shift (goto identifier-state)))))
    (comment-state 
      (cond
        (#\newline (drop (goto start-state)))
        (#\^ (goto start-state))
        (else (drop (goto comment-state)))))))

(define **keywords-list** '())

; > (driver-1 automaton-1 "abc
; def
; % comment
; xyz 13")
; emitting token (token identifier abc)
; emitting token (token identifier def)
; emitting token (token identifier xyz)
; emitting token (token number 13)
; emitting token (token end-marker #f)
; ((token identifier abc)
;  (token identifier def)
;  (token identifier xyz)
;  (token number 13))
; > 


;;; end of scan-ds.s

;;; ****************************************************************
;;; ****************************************************************
;;; ****************************************************************



;;; string->stream and stream->list are defined in scan.s

(define token-stream-get stream-get)

;;; ****************************************************************
;;; ****************************************************************

;;; Parser architecture

;;; parser = (list tree) * item * item-stream -> tree * item * item-stream

;;; The item register can either contain an item or '() -- the latter
;;; signifying an empty buffer, to be filled when necessary.

(define-record parser-result (tree item stream))

;;; Grammar of actions:

;;; action :: = ((check/drop class) . action)
;;;             ((check/shift class) . action)
;;;             ((process/nt non-terminal) . action)
;;;             ((reduce prod-name))
;;;             ((emit-list))
;;;             ((goto non-terminal))
;;;             (cond (non-terminal action) ... (else non-terminal))
;;; 
;;; parser ::= ((non-terminal action) ...)

(define *trace-apply-parser* #f)

;;; redefine fill-token-register, parser-check to be uncurried:

(define fill-token-register
  (lambda (buf token stream action)
    (if (null? token)
      (token-stream-get stream
        (lambda (token stream)
          (action buf token stream)))
      (action buf token stream))))

(define parser-check
  (lambda (class buf token stream action)
    (fill-token-register buf token stream
      (lambda (buf token stream)
        (if (eq? class (token->class token))
          (action buf token stream)
          (error 'check
            "looking for ~s, found ~s"
            class token))))))

(define apply-parser-action
  (lambda (parser action buf token stream)
    (if *trace-apply-parser*
      (printf 
        "apply-parser-action: action = ~s~% buf = ~s token = ~s~%"
        action buf token))
    (if (eq? (car action) 'cond)
      ;; it's a cond
      (let ((alternatives (cdr action)))
        (fill-token-register buf token stream
          (lambda (buf token stream)
            (apply-parser-action parser
              (apply-parser-cond alternatives token)
              buf token stream))))
      ;; otherwise it's an ordinary instruction
      (let ((instruction (car action))
            (action (cdr action))
            (whole-action action))
        (case (car instruction)
          ((check/drop)
           (let ((class (cadr instruction)))
             (parser-check class buf token stream 
               (lambda (buf token stream)
                 (apply-parser-action parser action buf '()
                   stream)))))
          ((check/shift)
           (let ((class (cadr instruction)))
             (parser-check class buf token stream 
               (lambda (buf token stream)
                 (apply-parser-action parser action
                   (cons (token->data token) buf) '() stream)))))
          ((reduce)
           (let ((prod-name (cadr instruction)))
             '(printf "reducing ~s: buf = ~s~%" prod-name buf)
             (make-parser-result
               (apply (make-record-from-name prod-name)
                      (reverse buf))
               token
               stream)))
          ((emit-list)
           '(printf "emit-list: emitting ~s~%" (reverse buf))
           (make-parser-result
             (reverse buf)
             token
             stream))
          ((fail)
           (let ((state (cadr instruction)))
             (error 'parser
               "couldn't match token ~s in state ~s"
               token state)))
          ((parser-goto goto)
           (let ((non-terminal (cadr instruction)))
             (apply-parser-action parser
               (cadr (assq non-terminal parser))
               buf token stream)))
          ((process-nt)
           (let ((non-terminal (cadr instruction)))
             (let ((next-result
                     (apply-parser-action parser
                       (cadr (assq non-terminal parser))
                       '() token stream)))
               (record-case next-result
                 (parser-result (tree token stream)
                   (apply-parser-action parser action
                     (cons tree buf) token stream))
                 (else (error 'process-nt
                         "bad parser-result ~s~%"
                         next-result))))))
          (else
            (error 'apply-parser-action
              "unknown action ~s~%" instruction)))))))


(define apply-parser-cond
  (lambda (alternatives token)
    '(printf "apply-parser-cond: alternatives = ~s token = ~s~%"
      alternatives token)
    (if (null? alternatives)
      (error 'apply-parser-cond
        "couldn't match token ~s~%" token)
      (let ((alternative-1 (car alternatives)))
        '(printf "apply-parser-cond: token = ~s alternative = ~s~%"
          token alternative-1)
        (if (or
              (eq? (car alternative-1) (token->class token))
              ;; "else" is always true if it's last alternative
              (and (eq? (car alternative-1) 'else)
                   (null? (cdr alternatives))))
          (cadr alternative-1)
          (apply-parser-cond (cdr alternatives) token))))))

;; new parse-top-level
;;
(define parse-top-level
  (lambda (parser token-stream)
    (let ((result 
            (apply-parser-action parser
              '((goto start-state))
              '() '() token-stream)))
      '(printf "top-level parse returned.~%")
      '(pretty-print result)
      (record-case result
        (parser-result (tree token stream)
          (let ((token (if (null? token)
                        (token-stream-get stream (lambda (token stream) token))
                        token)))
            (if (eq? (token->class token) 'end-marker)
              tree
              (error 'parse-top-level
                "symbols left over: ~s..."
                token))))
        (else 
          (error 'parse-top-level
            "top-level-parse not a parser-result"))))))

;; a simpler driver
(define simple-parse-top-level
  (lambda (parser token-stream)
    (apply-parser-action
      '((goto start-state))
      '() '() token-stream)))

;;; ****************************************************************

;;; constructors for example

(define-record compound-command (command-list))
(define-record while-command (exp cmd))
(define-record if-command (exp cmd1 cmd2))
(define-record assignment-command (var exp))
(define-record variable-expression (var))
(define-record sum-expression (exp1 exp2))
(define-record end-marker-command ())

(define **keywords-list** '(begin end exit))

(define parser-2
  '((start-state
      ((goto command)))
    (command
      (cond
        (begin
          ((check/drop begin)
           (process-nt compound-command)
           (reduce compound-command)))
        (identifier
          ((check/shift identifier)
           (check/drop assign-sym)
           (process-nt expression)
           (reduce assignment)))
        (exit
          ((check/drop exit)
           (reduce end-marker-command)))
        (else
          ((fail command)))))
    (compound-command
      ((process-nt command)
       (parser-goto compound-command-loop)))
    (compound-command-loop
      (cond
        (semicolon
          ((check/drop semicolon)
           (process-nt command)
           (parser-goto compound-command-loop)))
        (end
          ((check/drop end)
           (emit-list)))
        (else
          ((fail compound-command-loop)))))
    (expression
      (cond
        (identifier
          ((check/shift identifier)
           (reduce var-expression)))
        (number
          ((check/shift number)
           (reduce const-expression)))
        (lparen
          ((check/drop lparen)
           (process-nt expression)
           (check/drop plus-sym)
           (process-nt expression)
           (check/drop rparen)
           (reduce addition-expression)))
        (else
          ((parser-fail expression)))))))

(define string->token-stream
  (lambda (state string)
    ((automaton->stream-transducer state)
     (string->stream string))))

(define test1
  (lambda (input-string)
    (parse-top-level parser-2
      (string->token-stream automaton-1 input-string))))


; > (test1 "x := y")
; (assignment x (var-expression y))
; > (test1 "begin x:=y; x := z end")
; (compound-command
;    ((assignment x (var-expression y))
;     (assignment x (var-expression z))))
; > 

