;;; Written by William M. Wells.  This is an example lr parser driver
;;; which uses parse table files generated by Zebu.  


(declare (usual-integrations))

;;;
;;; A rudimentary lr parser driver.
;;; It has provisions for applying client supplied procedures which are
;;; associated with productions in the grammar.
;;;
;;;
;;; This code is independent of the parse table generating system,
;;; and basically stand alone,  although
;;; it needs some macros defined in other files.
;;;
;;; The following data structures are loaded from a parse table file by the 
;;; function which follows.
;;;
;;; lexicon is a vector of strings or lisp symbols , indexed by the 
;;; "grammar symbol indices",  which are the instantiations of
;;; the grammar symbols.
;;;
;;; terminal-indices is a list of the grammar symbol indices indicating
;;; which among them are terminal symbols.
;;;
;;; production-info is a vector, indexed by the production indices.
;;; Each item is a two element list: the first elements index the symbols
;;; which are the lhs of the productions, the second elements indicate the
;;; lengths of the productions.
;;;
;;; action-table is a vector indexed by the state indices.
;;; Each state's entry is a vector whose elements represent
;;; defined entries in the action parsing function. These entries are 3 element
;;; lists whose first elements are the indices of the grammar symbol argument
;;; to the action parsing function.  The second elements in the lists are an
;;; encoding of the action function: 's for shift, 'r for reduce, 'a for
;;; accept.  The third elements are production or next state indices as
;;; approprite.  The three element lists appear in their surrounding
;;; vectors sorted on their cars.
;;;
;;; goto-table is arranged similar to action-table but has two element
;;; lists instead of three.  The second element of each list are the
;;; index of the state to goto.
;;; 
;;; end-symbol-index holds the index of the end symbol.
;;;
;;; terminal-alist associates terminal symbol instantiations with
;;; their indices.
;;;
;;; client-lambdas are a vector of procedures, indexed by production index,
;;; which correspond to productions in the grammar.  The client lambdas are 
;;; what the parser calls to do syntax directed something by side effect.

(define lexicon)
(define terminal-indices)
(define production-info)
(define action-table)
(define goto-table)
(define lr-parser-start-state-index)
(define end-symbol-index)
(define terminal-alist)
(define client-lambdas)

;;; Read in a set of parse tables as written by (dump-tables) .

(define (load-parse-tables filename)
  (call-with-input-file filename
    (lambda (port)
      (set! lexicon (list->vector (read port)))
      (set! terminal-indices (read port))
      (set! production-info (list->vector (read port)))
      (set! action-table
            (list->vector (map list->vector (read port))))
      (set! goto-table
            (list->vector (map list->vector (read port))))
      (set! lr-parser-start-state-index (read port))
      (set! end-symbol-index (read port))
      (set! client-lambdas 
            (list->vector (cons 'place-holder (map evaluate (read port)))))))
  (set! terminal-alist 
        (map (lambda (index) (cons (vector-ref lexicon index) index))
             terminal-indices)))

;;; one-arg-eval is a macro whose definition varies with the
;;; scheme implementation.  (Wrapped, since we can't map macros...)

(define (evaluate form)
  (one-arg-eval form))



;;; The lr parser itself.
;;; symbol-stack and state-stack are the standard things for an lr parser.
;;; the client lambdas and stack are used in the following fashion:
;;;
;;;   When a shift action occurs, the instantiation of the input symbol
;;;   is pushed onto the client stack.
;;;
;;;   When a reduce action occurs, as many items as are on the lhs
;;;   of the associated production are popped from the client stack
;;;   and the corresponding client lambda is applied to the popped
;;;   items.  The result of the application is then pushed onto the 
;;;   client stack.  One may of course do whatever one wishes by side
;;;   effect.



(define (lr-parse next-sym-fn err-fn)
  (let 
    ((symbol-stack '())
     (state-stack (list lr-parser-start-state-index))
     (client-stack '()))
    (do ((input-symbol-stuff (car (next-sym-fn)))
         (done #f))
        (done (car client-stack))
        (assert (not (null? input-symbol-stuff))
                "internal parser error: out of symbols")
        (let* 
          ((input-symbol-index (cdr input-symbol-stuff))
           (input-symbol-instantiation (car input-symbol-stuff))
           (action-entry (vec-bs-assoc input-symbol-index
                                       (vector-ref action-table
                                                   (car state-stack)))))
          (if (null? action-entry) 
              (err-fn "syntax error (action not defined)"))
          (cond ((eq? (cadr action-entry) 's)	
                 ;; Shift.
                 ;; (newline) (display "shift")
                 (push input-symbol-index symbol-stack)
                 (push (caddr action-entry) state-stack)
                 (push input-symbol-instantiation client-stack)
                 (set! input-symbol-stuff (car (next-sym-fn))))
                ((eq? (cadr action-entry) 'r)	
                 ;; Reduce.
                 (let* ((prod-index (caddr action-entry))
                        (prod-len (cadr (vector-ref production-info
                                                    prod-index))))
                   ;; (newline) (display "reduce ") (display prod-index)
                   (popn prod-len symbol-stack)
                   (popn prod-len state-stack)
                   (push (car (vector-ref production-info prod-index))
                         symbol-stack)	; Push lhs of production.
                   (let ((goto (cadr (vec-bs-assoc (car symbol-stack)
                                                   (vector-ref
                                                     goto-table
                                                     (car state-stack))))))
                     (if (null? goto) 
                         (err-fn "table error? goto not defined!"))
                     (push goto state-stack))
                   ;; Apply the client lambda and store the result.
                   ;; Yikes! had hairy order of evaluation problem with cons...
                   ;; Gee, side-effects are dangerous...
                   (let ((args (popn prod-len client-stack)))
                     (push (apply (vector-ref client-lambdas prod-index) 
                                  args)
                           client-stack))))
                ;; Accept on end symbol.
                ((eq? (cadr action-entry) 'a)	
                 (if (= input-symbol-index end-symbol-index) (set! done #t)
                     (err-fn "extra input?")))
                (else (error "bogus action" (cadr action-entry))))))))



;;; A function for looking up table entries using binary search
;;; the vector elements are the assoc key and should be in increasing order.

(define (vec-bs-assoc num vec)
  (let ((last (- (vector-length vec) 1)))
    (if (or (< num (car (vector-ref vec 0)))
	    (> num (car (vector-ref vec last))))
	#f
	(vec-bs-assoc-aux num vec 0 last))))

(define (vec-bs-assoc-aux num vec start end)
  (cond ((= num (car (vector-ref vec start)))
	 (vector-ref vec start))
	((= start end) #f)
	(else (let ((mid (floor (/ (+ start end) 2))))
	     (if (> num (car (vector-ref vec mid)))
		 (vec-bs-assoc-aux num vec (+ 1 mid) end)
		 (vec-bs-assoc-aux num vec start mid))))))


;;; Figure out to which element of the lexicon a token corresponds.
;;; This gets a little complicated for terminal symbols which can
;;; vary a parsing time, for example, identifiers and numbers.  The way
;;; these "preterminals" are handled in this driver is as follows:
;;; If a token passes the scheme test NUMBER?, and the argument number-index
;;; isn't false, then number-index is treated as representing its category.
;;; Otherwise, if the token appears exactly in the lexicon, then it is
;;; given the category of the lexicon item.  Othewise it is assumed
;;; to be an instance of the terminal IDENTIFIER, whose presence in the
;;; lexicon is indicated by a non false value for the id-index argument.
;;; If the token isn't explicitly in the lexicon, and id-index is false,
;;; then an error is signalled.
;;; 


;;; number-index should be the index of the grammar symbol which stands
;;; for numbers, otherwise it should be false if numbers don't appear
;;; in the grammar.
;;;
;;; id-index should be the index of the grammar symbol which stands
;;; for identifiers, otherwise it should be false if identifiers don't
;;; appear in the grammar.


(define (categorize token number-index id-index)
  (let 
    ((category 
       (if (number? token)
           (begin (assert number-index "A number was seen in the token stream")
                  number-index)
           (let ((terminal-association (assoc token terminal-alist)))
             (if terminal-association 
                 (cdr terminal-association)
                 (begin 
                   (assert id-index 
                           "A token was seen which isn't in the grammar")
                   id-index))))))
    `((,token . ,category))))



;;; This implements a parser which gets its tokens from the supplied list.
;;; It uses the parsing engine lr-parse which is defined above.  It also
;;; uses the function categorize to classify tokens according to the 
;;; lexicon.

(define (list-parser token-list)
  (let* ((number-association (assoc 'number terminal-alist))
         (number-index (if number-association (cdr number-association) #f))
         (identifier-association (assoc 'identifier terminal-alist))
         (identifier-index (if identifier-association
                               (cdr identifier-association) #f)))
    (lr-parse
     ;; This lambda is the tokenizer supplied to the parsing engine:
     (lambda ()
       (if (null? token-list)
	   `((() . ,end-symbol-index))
	   (categorize (pop token-list) number-index identifier-index)))
     ;; This lambda is the error function supplied to the parsing engine:
     (lambda (string)
       (display "Syntax error. Remaining tokens: ")
       (error string token-list)))))
    

;;; This implements a parser which gets its tokens from the scheme function
;;; read.
;;; It uses the parsing engine lr-parse which is defined above.  It also
;;; uses the function categorize to classify tokens according to the 
;;; lexicon.  It will signal the end of input to the parser when it
;;; sees the symbol ':eof in the input stream, or if it read end of file.

(define (read-parser)
  (let* ((number-association (assoc 'number terminal-alist))
         (number-index (if number-association (cdr number-association) #f))
         (identifier-association (assoc 'identifier terminal-alist))
         (identifier-index (if identifier-association
                               (cdr identifier-association) #f))
         (token 'bogus))
    (lr-parse
     ;; This lambda is the tokenizer supplied to the parsing engine:
      (lambda ()
        (set! token (read))
        (if (or (eof-object? token) (eq? token ':eof))
            `((() . ,end-symbol-index))
            (categorize token number-index identifier-index)))
     ;; This lambda is the error function supplied to the parsing engine:
      (lambda (string)
        (display "Syntax error. Last token read: ") (display token)
        (error string token)))))



;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; test:
(comment-out
  (load "pc-scheme.scm")
  (load "macros.scm")
  (load "driver.scm")

  (compile-lalr1-grammar "ex1.grm" "ex1.tab")
  (load-parse-tables "ex1.tab")
  (load-lambdas "ex1.grm")
  (list-parser '(ned "+" Jed))
  )


;;; PC scheme requires a control-Z at the end of each source file: 
